|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9897455374097985, |
|
"eval_steps": 800, |
|
"global_step": 738, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004051145714647424, |
|
"grad_norm": 23.354017564608814, |
|
"learning_rate": 1.3513513513513514e-09, |
|
"logits/chosen": -8.401385307312012, |
|
"logits/rejected": -7.887872695922852, |
|
"logps/chosen": -22.588991165161133, |
|
"logps/rejected": -32.73207473754883, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.040511457146474236, |
|
"grad_norm": 23.005760986532493, |
|
"learning_rate": 1.3513513513513514e-08, |
|
"logits/chosen": -8.023005485534668, |
|
"logits/rejected": -7.215701580047607, |
|
"logps/chosen": -26.691545486450195, |
|
"logps/rejected": -35.018611907958984, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.3958333432674408, |
|
"rewards/chosen": -0.00048815118498168886, |
|
"rewards/margins": -0.001463304040953517, |
|
"rewards/rejected": 0.0009751527686603367, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08102291429294847, |
|
"grad_norm": 25.76788201635075, |
|
"learning_rate": 2.7027027027027028e-08, |
|
"logits/chosen": -8.08788776397705, |
|
"logits/rejected": -7.492005825042725, |
|
"logps/chosen": -28.649845123291016, |
|
"logps/rejected": -38.781715393066406, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.0005952938226982951, |
|
"rewards/margins": 0.00045213793055154383, |
|
"rewards/rejected": 0.0001431556447641924, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12153437143942271, |
|
"grad_norm": 24.285898108451498, |
|
"learning_rate": 4.054054054054054e-08, |
|
"logits/chosen": -8.020319938659668, |
|
"logits/rejected": -7.39060115814209, |
|
"logps/chosen": -26.554407119750977, |
|
"logps/rejected": -39.03184509277344, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0008592296508140862, |
|
"rewards/margins": 0.003104406874626875, |
|
"rewards/rejected": -0.002245177049189806, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16204582858589694, |
|
"grad_norm": 26.673301885189623, |
|
"learning_rate": 5.4054054054054056e-08, |
|
"logits/chosen": -8.143043518066406, |
|
"logits/rejected": -7.433783531188965, |
|
"logps/chosen": -25.945232391357422, |
|
"logps/rejected": -37.73704147338867, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 9.569372195983306e-05, |
|
"rewards/margins": 0.007951222360134125, |
|
"rewards/rejected": -0.007855528965592384, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2025572857323712, |
|
"grad_norm": 26.497438005541635, |
|
"learning_rate": 6.756756756756756e-08, |
|
"logits/chosen": -8.0227632522583, |
|
"logits/rejected": -7.415746212005615, |
|
"logps/chosen": -28.301464080810547, |
|
"logps/rejected": -38.43585205078125, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": 0.0006854601087979972, |
|
"rewards/margins": 0.0169823057949543, |
|
"rewards/rejected": -0.01629684492945671, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24306874287884542, |
|
"grad_norm": 24.919397084889898, |
|
"learning_rate": 8.108108108108108e-08, |
|
"logits/chosen": -8.055706024169922, |
|
"logits/rejected": -7.392535209655762, |
|
"logps/chosen": -26.291152954101562, |
|
"logps/rejected": -37.135032653808594, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.0014377154875546694, |
|
"rewards/margins": 0.03630940988659859, |
|
"rewards/rejected": -0.03487168997526169, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.28358020002531964, |
|
"grad_norm": 20.09272984199583, |
|
"learning_rate": 9.45945945945946e-08, |
|
"logits/chosen": -8.232405662536621, |
|
"logits/rejected": -7.535184383392334, |
|
"logps/chosen": -27.17940902709961, |
|
"logps/rejected": -36.42096710205078, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.0027192712295800447, |
|
"rewards/margins": 0.06449674069881439, |
|
"rewards/rejected": -0.06177746504545212, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3240916571717939, |
|
"grad_norm": 23.04151710177534, |
|
"learning_rate": 9.997985455197112e-08, |
|
"logits/chosen": -8.351137161254883, |
|
"logits/rejected": -7.78473424911499, |
|
"logps/chosen": -26.988977432250977, |
|
"logps/rejected": -36.72052001953125, |
|
"loss": 0.6537, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": 0.00015634755254723132, |
|
"rewards/margins": 0.08445531874895096, |
|
"rewards/rejected": -0.08429896831512451, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.36460311431826814, |
|
"grad_norm": 24.86811296840432, |
|
"learning_rate": 9.98568022639826e-08, |
|
"logits/chosen": -8.316564559936523, |
|
"logits/rejected": -7.610459327697754, |
|
"logps/chosen": -28.134241104125977, |
|
"logps/rejected": -40.095638275146484, |
|
"loss": 0.6307, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.005406993441283703, |
|
"rewards/margins": 0.13842421770095825, |
|
"rewards/rejected": -0.13301721215248108, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4051145714647424, |
|
"grad_norm": 23.930696194698072, |
|
"learning_rate": 9.962216467480192e-08, |
|
"logits/chosen": -8.399057388305664, |
|
"logits/rejected": -7.728206634521484, |
|
"logps/chosen": -27.389541625976562, |
|
"logps/rejected": -42.16829299926758, |
|
"loss": 0.6168, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.0014130814233794808, |
|
"rewards/margins": 0.17133907973766327, |
|
"rewards/rejected": -0.17275215685367584, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4456260286112166, |
|
"grad_norm": 25.12250587863022, |
|
"learning_rate": 9.927646693054496e-08, |
|
"logits/chosen": -8.546191215515137, |
|
"logits/rejected": -7.818883419036865, |
|
"logps/chosen": -26.858484268188477, |
|
"logps/rejected": -42.138423919677734, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.002900606021285057, |
|
"rewards/margins": 0.24653196334838867, |
|
"rewards/rejected": -0.24943256378173828, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.48613748575769083, |
|
"grad_norm": 23.993302988236724, |
|
"learning_rate": 9.882048274282503e-08, |
|
"logits/chosen": -8.554715156555176, |
|
"logits/rejected": -7.8766679763793945, |
|
"logps/chosen": -26.874399185180664, |
|
"logps/rejected": -40.70722961425781, |
|
"loss": 0.5763, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -0.026281490921974182, |
|
"rewards/margins": 0.26420071721076965, |
|
"rewards/rejected": -0.29048219323158264, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5266489429041651, |
|
"grad_norm": 19.760387337865797, |
|
"learning_rate": 9.825523265709666e-08, |
|
"logits/chosen": -8.872574806213379, |
|
"logits/rejected": -8.05439281463623, |
|
"logps/chosen": -29.766094207763672, |
|
"logps/rejected": -39.4333381652832, |
|
"loss": 0.5627, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.01587745174765587, |
|
"rewards/margins": 0.3382584750652313, |
|
"rewards/rejected": -0.3541359305381775, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5671604000506393, |
|
"grad_norm": 20.405644698254697, |
|
"learning_rate": 9.758198176855646e-08, |
|
"logits/chosen": -8.83372974395752, |
|
"logits/rejected": -8.060525894165039, |
|
"logps/chosen": -27.360458374023438, |
|
"logps/rejected": -41.453636169433594, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.028619933873414993, |
|
"rewards/margins": 0.39799413084983826, |
|
"rewards/rejected": -0.42661410570144653, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6076718571971136, |
|
"grad_norm": 24.42451138080484, |
|
"learning_rate": 9.680223689071363e-08, |
|
"logits/chosen": -9.05488109588623, |
|
"logits/rejected": -8.361872673034668, |
|
"logps/chosen": -27.39214515686035, |
|
"logps/rejected": -44.14860916137695, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.041482653468847275, |
|
"rewards/margins": 0.43668460845947266, |
|
"rewards/rejected": -0.47816720604896545, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6481833143435878, |
|
"grad_norm": 22.08758150384141, |
|
"learning_rate": 9.59177431829666e-08, |
|
"logits/chosen": -9.038153648376465, |
|
"logits/rejected": -8.328339576721191, |
|
"logps/chosen": -27.219493865966797, |
|
"logps/rejected": -43.879791259765625, |
|
"loss": 0.5219, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.039076704531908035, |
|
"rewards/margins": 0.47056007385253906, |
|
"rewards/rejected": -0.5096367597579956, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6886947714900621, |
|
"grad_norm": 22.525750174655215, |
|
"learning_rate": 9.493048024473411e-08, |
|
"logits/chosen": -9.239489555358887, |
|
"logits/rejected": -8.590790748596191, |
|
"logps/chosen": -27.6241455078125, |
|
"logps/rejected": -43.37400817871094, |
|
"loss": 0.5152, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.07299178838729858, |
|
"rewards/margins": 0.4755212664604187, |
|
"rewards/rejected": -0.5485130548477173, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7292062286365363, |
|
"grad_norm": 21.336294616647535, |
|
"learning_rate": 9.384265768488224e-08, |
|
"logits/chosen": -9.427377700805664, |
|
"logits/rejected": -8.607305526733398, |
|
"logps/chosen": -27.55912208557129, |
|
"logps/rejected": -42.556678771972656, |
|
"loss": 0.4916, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.07397119700908661, |
|
"rewards/margins": 0.5928912162780762, |
|
"rewards/rejected": -0.666862428188324, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7697176857830105, |
|
"grad_norm": 20.933312614297105, |
|
"learning_rate": 9.265671017636383e-08, |
|
"logits/chosen": -9.61644458770752, |
|
"logits/rejected": -8.950950622558594, |
|
"logps/chosen": -28.78060531616211, |
|
"logps/rejected": -45.048828125, |
|
"loss": 0.4693, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.06900081783533096, |
|
"rewards/margins": 0.6354625821113586, |
|
"rewards/rejected": -0.7044633030891418, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8102291429294848, |
|
"grad_norm": 20.036505723069673, |
|
"learning_rate": 9.137529200713809e-08, |
|
"logits/chosen": -9.64765739440918, |
|
"logits/rejected": -8.868057250976562, |
|
"logps/chosen": -26.3610782623291, |
|
"logps/rejected": -43.371124267578125, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -0.10404930263757706, |
|
"rewards/margins": 0.7085543870925903, |
|
"rewards/rejected": -0.812603771686554, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.850740600075959, |
|
"grad_norm": 21.18417249103817, |
|
"learning_rate": 9.000127113956672e-08, |
|
"logits/chosen": -9.870735168457031, |
|
"logits/rejected": -9.190484046936035, |
|
"logps/chosen": -26.790430068969727, |
|
"logps/rejected": -43.78266525268555, |
|
"loss": 0.4639, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -0.10198328644037247, |
|
"rewards/margins": 0.7101414799690247, |
|
"rewards/rejected": -0.8121248483657837, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.8912520572224332, |
|
"grad_norm": 18.99507288099157, |
|
"learning_rate": 8.853772279158165e-08, |
|
"logits/chosen": -9.834332466125488, |
|
"logits/rejected": -9.097946166992188, |
|
"logps/chosen": -27.970256805419922, |
|
"logps/rejected": -44.3878059387207, |
|
"loss": 0.4372, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.1515013873577118, |
|
"rewards/margins": 0.78831946849823, |
|
"rewards/rejected": -0.9398208856582642, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9317635143689075, |
|
"grad_norm": 22.643159035198256, |
|
"learning_rate": 8.698792255399104e-08, |
|
"logits/chosen": -10.250986099243164, |
|
"logits/rejected": -9.542135238647461, |
|
"logps/chosen": -28.57198715209961, |
|
"logps/rejected": -47.67482376098633, |
|
"loss": 0.4174, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.16280151903629303, |
|
"rewards/margins": 0.9155281186103821, |
|
"rewards/rejected": -1.0783296823501587, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.9722749715153817, |
|
"grad_norm": 17.009463712911476, |
|
"learning_rate": 8.535533905932736e-08, |
|
"logits/chosen": -10.252339363098145, |
|
"logits/rejected": -9.567429542541504, |
|
"logps/chosen": -29.68218421936035, |
|
"logps/rejected": -49.98845291137695, |
|
"loss": 0.4311, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.21189716458320618, |
|
"rewards/margins": 0.9156039953231812, |
|
"rewards/rejected": -1.1275012493133545, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.012786428661856, |
|
"grad_norm": 21.283801315155564, |
|
"learning_rate": 8.364362621864594e-08, |
|
"logits/chosen": -10.185906410217285, |
|
"logits/rejected": -9.48774528503418, |
|
"logps/chosen": -28.432941436767578, |
|
"logps/rejected": -50.117576599121094, |
|
"loss": 0.4013, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.21948130428791046, |
|
"rewards/margins": 0.9736725687980652, |
|
"rewards/rejected": -1.1931538581848145, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0532978858083302, |
|
"grad_norm": 22.754490059016796, |
|
"learning_rate": 8.185661504364844e-08, |
|
"logits/chosen": -10.365915298461914, |
|
"logits/rejected": -9.623150825500488, |
|
"logps/chosen": -28.134822845458984, |
|
"logps/rejected": -48.77445983886719, |
|
"loss": 0.3799, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -0.22865962982177734, |
|
"rewards/margins": 1.057023286819458, |
|
"rewards/rejected": -1.2856829166412354, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.0938093429548044, |
|
"grad_norm": 29.101808968686573, |
|
"learning_rate": 7.999830507243477e-08, |
|
"logits/chosen": -10.540546417236328, |
|
"logits/rejected": -9.749776840209961, |
|
"logps/chosen": -32.03081512451172, |
|
"logps/rejected": -50.996429443359375, |
|
"loss": 0.4043, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.30270159244537354, |
|
"rewards/margins": 1.1776001453399658, |
|
"rewards/rejected": -1.480301856994629, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.1343208001012786, |
|
"grad_norm": 22.52674198964978, |
|
"learning_rate": 7.80728554180734e-08, |
|
"logits/chosen": -10.482057571411133, |
|
"logits/rejected": -9.802620887756348, |
|
"logps/chosen": -29.116191864013672, |
|
"logps/rejected": -51.501319885253906, |
|
"loss": 0.3695, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.2759430706501007, |
|
"rewards/margins": 1.228001356124878, |
|
"rewards/rejected": -1.5039442777633667, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.174832257247753, |
|
"grad_norm": 23.744363106936603, |
|
"learning_rate": 7.608457546002422e-08, |
|
"logits/chosen": -10.582446098327637, |
|
"logits/rejected": -9.793972969055176, |
|
"logps/chosen": -30.690120697021484, |
|
"logps/rejected": -52.65431594848633, |
|
"loss": 0.3608, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.35969388484954834, |
|
"rewards/margins": 1.3402436971664429, |
|
"rewards/rejected": -1.6999378204345703, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.2153437143942272, |
|
"grad_norm": 16.96872058311971, |
|
"learning_rate": 7.403791519924793e-08, |
|
"logits/chosen": -10.636259078979492, |
|
"logits/rejected": -9.861931800842285, |
|
"logps/chosen": -30.6519718170166, |
|
"logps/rejected": -55.83306884765625, |
|
"loss": 0.3609, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.34394341707229614, |
|
"rewards/margins": 1.4830596446990967, |
|
"rewards/rejected": -1.8270031213760376, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.2558551715407014, |
|
"grad_norm": 23.80487547845051, |
|
"learning_rate": 7.193745529858826e-08, |
|
"logits/chosen": -10.673724174499512, |
|
"logits/rejected": -9.968966484069824, |
|
"logps/chosen": -32.3521614074707, |
|
"logps/rejected": -55.50031661987305, |
|
"loss": 0.3796, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -0.4293053150177002, |
|
"rewards/margins": 1.2904064655303955, |
|
"rewards/rejected": -1.7197116613388062, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.2963666286871756, |
|
"grad_norm": 26.02880426926903, |
|
"learning_rate": 6.978789683071759e-08, |
|
"logits/chosen": -10.829614639282227, |
|
"logits/rejected": -10.158222198486328, |
|
"logps/chosen": -30.30867576599121, |
|
"logps/rejected": -54.475677490234375, |
|
"loss": 0.3895, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.41875213384628296, |
|
"rewards/margins": 1.3132567405700684, |
|
"rewards/rejected": -1.732008695602417, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.3368780858336498, |
|
"grad_norm": 23.11525326850835, |
|
"learning_rate": 6.759405075659165e-08, |
|
"logits/chosen": -10.891769409179688, |
|
"logits/rejected": -10.190674781799316, |
|
"logps/chosen": -30.676244735717773, |
|
"logps/rejected": -58.19831466674805, |
|
"loss": 0.3621, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.4654254913330078, |
|
"rewards/margins": 1.587354302406311, |
|
"rewards/rejected": -2.0527796745300293, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.377389542980124, |
|
"grad_norm": 19.61508445315914, |
|
"learning_rate": 6.536082715796124e-08, |
|
"logits/chosen": -11.017595291137695, |
|
"logits/rejected": -10.286384582519531, |
|
"logps/chosen": -29.53006362915039, |
|
"logps/rejected": -54.037132263183594, |
|
"loss": 0.3455, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.3892853260040283, |
|
"rewards/margins": 1.4755113124847412, |
|
"rewards/rejected": -1.8647968769073486, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.4179010001265984, |
|
"grad_norm": 24.546782869534564, |
|
"learning_rate": 6.309322424804034e-08, |
|
"logits/chosen": -10.974300384521484, |
|
"logits/rejected": -10.224813461303711, |
|
"logps/chosen": -33.27606201171875, |
|
"logps/rejected": -58.134117126464844, |
|
"loss": 0.3673, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5354558229446411, |
|
"rewards/margins": 1.5486235618591309, |
|
"rewards/rejected": -2.0840792655944824, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.4584124572730726, |
|
"grad_norm": 20.799419761461635, |
|
"learning_rate": 6.079631718492568e-08, |
|
"logits/chosen": -11.121304512023926, |
|
"logits/rejected": -10.479646682739258, |
|
"logps/chosen": -30.873401641845703, |
|
"logps/rejected": -58.193626403808594, |
|
"loss": 0.3602, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.4879949986934662, |
|
"rewards/margins": 1.5607774257659912, |
|
"rewards/rejected": -2.0487725734710693, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.4989239144195468, |
|
"grad_norm": 22.15106589586126, |
|
"learning_rate": 5.847524671280484e-08, |
|
"logits/chosen": -11.006221771240234, |
|
"logits/rejected": -10.338632583618164, |
|
"logps/chosen": -31.7496395111084, |
|
"logps/rejected": -56.75434112548828, |
|
"loss": 0.355, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.5273393392562866, |
|
"rewards/margins": 1.4817734956741333, |
|
"rewards/rejected": -2.00911283493042, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.5394353715660212, |
|
"grad_norm": 27.06131590311837, |
|
"learning_rate": 5.6135207656374886e-08, |
|
"logits/chosen": -11.114490509033203, |
|
"logits/rejected": -10.422750473022461, |
|
"logps/chosen": -31.69234848022461, |
|
"logps/rejected": -55.545692443847656, |
|
"loss": 0.3597, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -0.5423402190208435, |
|
"rewards/margins": 1.4755096435546875, |
|
"rewards/rejected": -2.017849922180176, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.5799468287124951, |
|
"grad_norm": 25.089579946286218, |
|
"learning_rate": 5.3781437294222844e-08, |
|
"logits/chosen": -11.122465133666992, |
|
"logits/rejected": -10.4068603515625, |
|
"logps/chosen": -32.763519287109375, |
|
"logps/rejected": -58.76951217651367, |
|
"loss": 0.3421, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5654906034469604, |
|
"rewards/margins": 1.569490671157837, |
|
"rewards/rejected": -2.134981155395508, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.6204582858589696, |
|
"grad_norm": 25.42503001610947, |
|
"learning_rate": 5.141920363718916e-08, |
|
"logits/chosen": -11.278289794921875, |
|
"logits/rejected": -10.557046890258789, |
|
"logps/chosen": -33.54096221923828, |
|
"logps/rejected": -57.26496505737305, |
|
"loss": 0.3619, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.638907253742218, |
|
"rewards/margins": 1.6008431911468506, |
|
"rewards/rejected": -2.239750385284424, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.6609697430054438, |
|
"grad_norm": 19.761132187041504, |
|
"learning_rate": 4.905379363794906e-08, |
|
"logits/chosen": -11.1757230758667, |
|
"logits/rejected": -10.361800193786621, |
|
"logps/chosen": -31.939849853515625, |
|
"logps/rejected": -58.90076446533203, |
|
"loss": 0.3557, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5620619058609009, |
|
"rewards/margins": 1.6297252178192139, |
|
"rewards/rejected": -2.1917872428894043, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.701481200151918, |
|
"grad_norm": 21.623030847191835, |
|
"learning_rate": 4.669050135819966e-08, |
|
"logits/chosen": -11.290115356445312, |
|
"logits/rejected": -10.585660934448242, |
|
"logps/chosen": -31.6041316986084, |
|
"logps/rejected": -54.78083419799805, |
|
"loss": 0.3602, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.6143931150436401, |
|
"rewards/margins": 1.5332918167114258, |
|
"rewards/rejected": -2.1476848125457764, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.7419926572983924, |
|
"grad_norm": 24.475959531037525, |
|
"learning_rate": 4.4334616119936507e-08, |
|
"logits/chosen": -11.401643753051758, |
|
"logits/rejected": -10.660685539245605, |
|
"logps/chosen": -31.317325592041016, |
|
"logps/rejected": -57.25482940673828, |
|
"loss": 0.3347, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.557376503944397, |
|
"rewards/margins": 1.634526014328003, |
|
"rewards/rejected": -2.1919026374816895, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.7825041144448663, |
|
"grad_norm": 19.486205802493238, |
|
"learning_rate": 4.199141066733789e-08, |
|
"logits/chosen": -11.200121879577637, |
|
"logits/rejected": -10.525270462036133, |
|
"logps/chosen": -29.716289520263672, |
|
"logps/rejected": -55.837684631347656, |
|
"loss": 0.3402, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5918062925338745, |
|
"rewards/margins": 1.5346314907073975, |
|
"rewards/rejected": -2.1264379024505615, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.8230155715913408, |
|
"grad_norm": 33.35679620452287, |
|
"learning_rate": 3.966612936575235e-08, |
|
"logits/chosen": -11.320077896118164, |
|
"logits/rejected": -10.705958366394043, |
|
"logps/chosen": -31.9770450592041, |
|
"logps/rejected": -57.85862350463867, |
|
"loss": 0.3475, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.6092299818992615, |
|
"rewards/margins": 1.5435864925384521, |
|
"rewards/rejected": -2.1528162956237793, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.863527028737815, |
|
"grad_norm": 31.567941489065486, |
|
"learning_rate": 3.736397646420134e-08, |
|
"logits/chosen": -11.576532363891602, |
|
"logits/rejected": -10.939656257629395, |
|
"logps/chosen": -33.84613037109375, |
|
"logps/rejected": -59.848854064941406, |
|
"loss": 0.3416, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/chosen": -0.6238502860069275, |
|
"rewards/margins": 1.6080996990203857, |
|
"rewards/rejected": -2.231950283050537, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.9040384858842891, |
|
"grad_norm": 26.954070410698666, |
|
"learning_rate": 3.509010444766674e-08, |
|
"logits/chosen": -11.481412887573242, |
|
"logits/rejected": -10.886090278625488, |
|
"logps/chosen": -33.668968200683594, |
|
"logps/rejected": -61.31293869018555, |
|
"loss": 0.3405, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.6743445992469788, |
|
"rewards/margins": 1.5814064741134644, |
|
"rewards/rejected": -2.255751132965088, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.9445499430307633, |
|
"grad_norm": 26.90008640112988, |
|
"learning_rate": 3.284960250523237e-08, |
|
"logits/chosen": -11.358150482177734, |
|
"logits/rejected": -10.692336082458496, |
|
"logps/chosen": -32.4575309753418, |
|
"logps/rejected": -58.795753479003906, |
|
"loss": 0.3619, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.6383572816848755, |
|
"rewards/margins": 1.5735241174697876, |
|
"rewards/rejected": -2.211881160736084, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.9850614001772375, |
|
"grad_norm": 22.343884141895376, |
|
"learning_rate": 3.064748513988914e-08, |
|
"logits/chosen": -11.427927017211914, |
|
"logits/rejected": -10.837053298950195, |
|
"logps/chosen": -32.22171401977539, |
|
"logps/rejected": -59.157142639160156, |
|
"loss": 0.3589, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.6551386117935181, |
|
"rewards/margins": 1.6512985229492188, |
|
"rewards/rejected": -2.3064370155334473, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.025572857323712, |
|
"grad_norm": 21.328409409312936, |
|
"learning_rate": 2.8488680945496145e-08, |
|
"logits/chosen": -11.407200813293457, |
|
"logits/rejected": -10.8077974319458, |
|
"logps/chosen": -32.2757453918457, |
|
"logps/rejected": -58.08913040161133, |
|
"loss": 0.3406, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6142057776451111, |
|
"rewards/margins": 1.6251834630966187, |
|
"rewards/rejected": -2.239389419555664, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.066084314470186, |
|
"grad_norm": 21.92232136811071, |
|
"learning_rate": 2.6378021576016464e-08, |
|
"logits/chosen": -11.57470703125, |
|
"logits/rejected": -10.841165542602539, |
|
"logps/chosen": -33.64252471923828, |
|
"logps/rejected": -60.10270309448242, |
|
"loss": 0.3445, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6759551763534546, |
|
"rewards/margins": 1.7165590524673462, |
|
"rewards/rejected": -2.3925139904022217, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.1065957716166603, |
|
"grad_norm": 23.376464913490395, |
|
"learning_rate": 2.4320230931715697e-08, |
|
"logits/chosen": -11.309526443481445, |
|
"logits/rejected": -10.582537651062012, |
|
"logps/chosen": -33.03705596923828, |
|
"logps/rejected": -59.86212158203125, |
|
"loss": 0.3228, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/chosen": -0.7096266746520996, |
|
"rewards/margins": 1.673158884048462, |
|
"rewards/rejected": -2.3827853202819824, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.1471072287631348, |
|
"grad_norm": 18.289595804333104, |
|
"learning_rate": 2.2319914586525774e-08, |
|
"logits/chosen": -11.554033279418945, |
|
"logits/rejected": -10.901694297790527, |
|
"logps/chosen": -34.2962646484375, |
|
"logps/rejected": -61.11516189575195, |
|
"loss": 0.3355, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.6974956393241882, |
|
"rewards/margins": 1.66302490234375, |
|
"rewards/rejected": -2.360520601272583, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.1876186859096087, |
|
"grad_norm": 23.20641172076809, |
|
"learning_rate": 2.038154948023668e-08, |
|
"logits/chosen": -11.543357849121094, |
|
"logits/rejected": -10.89212703704834, |
|
"logps/chosen": -35.47829055786133, |
|
"logps/rejected": -60.59331130981445, |
|
"loss": 0.3243, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.7092735171318054, |
|
"rewards/margins": 1.7081466913223267, |
|
"rewards/rejected": -2.4174201488494873, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.228130143056083, |
|
"grad_norm": 25.247728097262844, |
|
"learning_rate": 1.8509473898586432e-08, |
|
"logits/chosen": -11.410323143005371, |
|
"logits/rejected": -10.694147109985352, |
|
"logps/chosen": -32.15620803833008, |
|
"logps/rejected": -62.000328063964844, |
|
"loss": 0.2968, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6627341508865356, |
|
"rewards/margins": 1.8608602285385132, |
|
"rewards/rejected": -2.523594617843628, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.268641600202557, |
|
"grad_norm": 24.11504934805817, |
|
"learning_rate": 1.6707877763674887e-08, |
|
"logits/chosen": -11.610152244567871, |
|
"logits/rejected": -11.053690910339355, |
|
"logps/chosen": -33.62152862548828, |
|
"logps/rejected": -62.07065963745117, |
|
"loss": 0.3314, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.7055118083953857, |
|
"rewards/margins": 1.7053735256195068, |
|
"rewards/rejected": -2.4108853340148926, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.3091530573490315, |
|
"grad_norm": 24.563417003466505, |
|
"learning_rate": 1.4980793256432472e-08, |
|
"logits/chosen": -11.563300132751465, |
|
"logits/rejected": -10.760797500610352, |
|
"logps/chosen": -34.131736755371094, |
|
"logps/rejected": -62.216285705566406, |
|
"loss": 0.3008, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.7754543423652649, |
|
"rewards/margins": 1.8263394832611084, |
|
"rewards/rejected": -2.6017937660217285, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.349664514495506, |
|
"grad_norm": 32.15938788887828, |
|
"learning_rate": 1.3332085792131965e-08, |
|
"logits/chosen": -11.64229679107666, |
|
"logits/rejected": -10.900683403015137, |
|
"logps/chosen": -35.07176971435547, |
|
"logps/rejected": -61.276954650878906, |
|
"loss": 0.3318, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.7460302114486694, |
|
"rewards/margins": 1.772905707359314, |
|
"rewards/rejected": -2.5189361572265625, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.39017597164198, |
|
"grad_norm": 30.52452011331439, |
|
"learning_rate": 1.1765445369141274e-08, |
|
"logits/chosen": -11.567706108093262, |
|
"logits/rejected": -10.895853042602539, |
|
"logps/chosen": -33.69668960571289, |
|
"logps/rejected": -62.25682830810547, |
|
"loss": 0.3143, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.7219974398612976, |
|
"rewards/margins": 1.7591053247451782, |
|
"rewards/rejected": -2.481102466583252, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.4306874287884543, |
|
"grad_norm": 19.90507599157542, |
|
"learning_rate": 1.0284378310279368e-08, |
|
"logits/chosen": -11.508050918579102, |
|
"logits/rejected": -10.74337387084961, |
|
"logps/chosen": -32.797637939453125, |
|
"logps/rejected": -63.94108200073242, |
|
"loss": 0.303, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.6646064519882202, |
|
"rewards/margins": 2.0007808208465576, |
|
"rewards/rejected": -2.6653873920440674, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.4711988859349283, |
|
"grad_norm": 21.194773980773512, |
|
"learning_rate": 8.892199415259499e-09, |
|
"logits/chosen": -11.653422355651855, |
|
"logits/rejected": -11.00390911102295, |
|
"logps/chosen": -35.742061614990234, |
|
"logps/rejected": -65.51417541503906, |
|
"loss": 0.3164, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.7325908541679382, |
|
"rewards/margins": 1.797716498374939, |
|
"rewards/rejected": -2.5303072929382324, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.5117103430814027, |
|
"grad_norm": 32.194708817268754, |
|
"learning_rate": 7.592024541783343e-09, |
|
"logits/chosen": -11.667333602905273, |
|
"logits/rejected": -10.975776672363281, |
|
"logps/chosen": -34.44770812988281, |
|
"logps/rejected": -59.715660095214844, |
|
"loss": 0.3359, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.762362003326416, |
|
"rewards/margins": 1.7372121810913086, |
|
"rewards/rejected": -2.4995741844177246, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.552221800227877, |
|
"grad_norm": 22.079251389305, |
|
"learning_rate": 6.386763631890313e-09, |
|
"logits/chosen": -11.638243675231934, |
|
"logits/rejected": -10.932289123535156, |
|
"logps/chosen": -32.0985107421875, |
|
"logps/rejected": -59.693931579589844, |
|
"loss": 0.3138, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -0.6655905842781067, |
|
"rewards/margins": 1.7755588293075562, |
|
"rewards/rejected": -2.4411492347717285, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.592733257374351, |
|
"grad_norm": 27.178193998967842, |
|
"learning_rate": 5.279114199170093e-09, |
|
"logits/chosen": -11.623285293579102, |
|
"logits/rejected": -10.849055290222168, |
|
"logps/chosen": -34.02157211303711, |
|
"logps/rejected": -63.82293701171875, |
|
"loss": 0.2965, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.7236124873161316, |
|
"rewards/margins": 1.9638967514038086, |
|
"rewards/rejected": -2.687509059906006, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.6332447145208255, |
|
"grad_norm": 23.237750807734354, |
|
"learning_rate": 4.271555291414636e-09, |
|
"logits/chosen": -11.59123420715332, |
|
"logits/rejected": -10.88310432434082, |
|
"logps/chosen": -35.09485626220703, |
|
"logps/rejected": -63.60078811645508, |
|
"loss": 0.3114, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -0.6854656934738159, |
|
"rewards/margins": 1.8889812231063843, |
|
"rewards/rejected": -2.5744469165802, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.6737561716672995, |
|
"grad_norm": 23.37299143104811, |
|
"learning_rate": 3.3663419422218677e-09, |
|
"logits/chosen": -11.671598434448242, |
|
"logits/rejected": -10.90255355834961, |
|
"logps/chosen": -34.43938446044922, |
|
"logps/rejected": -63.91756057739258, |
|
"loss": 0.295, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.690475344657898, |
|
"rewards/margins": 2.0302391052246094, |
|
"rewards/rejected": -2.7207143306732178, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.714267628813774, |
|
"grad_norm": 21.948999572300334, |
|
"learning_rate": 2.565500123969183e-09, |
|
"logits/chosen": -11.691490173339844, |
|
"logits/rejected": -11.033750534057617, |
|
"logps/chosen": -34.22254180908203, |
|
"logps/rejected": -60.50248336791992, |
|
"loss": 0.3374, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.8206373453140259, |
|
"rewards/margins": 1.7287733554840088, |
|
"rewards/rejected": -2.549410343170166, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.754779085960248, |
|
"grad_norm": 29.12118466657181, |
|
"learning_rate": 1.8708222134525163e-09, |
|
"logits/chosen": -11.480539321899414, |
|
"logits/rejected": -10.761662483215332, |
|
"logps/chosen": -34.17140197753906, |
|
"logps/rejected": -62.354156494140625, |
|
"loss": 0.3059, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.7504395246505737, |
|
"rewards/margins": 1.792453408241272, |
|
"rewards/rejected": -2.5428929328918457, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.7952905431067223, |
|
"grad_norm": 28.127183360242164, |
|
"learning_rate": 1.283862980339334e-09, |
|
"logits/chosen": -11.640631675720215, |
|
"logits/rejected": -11.045839309692383, |
|
"logps/chosen": -36.884315490722656, |
|
"logps/rejected": -64.67533874511719, |
|
"loss": 0.3354, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.8064830899238586, |
|
"rewards/margins": 1.7638835906982422, |
|
"rewards/rejected": -2.570366859436035, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.8358020002531967, |
|
"grad_norm": 27.365840296062363, |
|
"learning_rate": 8.059361074139293e-10, |
|
"logits/chosen": -11.53732967376709, |
|
"logits/rejected": -10.982316970825195, |
|
"logps/chosen": -34.8204231262207, |
|
"logps/rejected": -60.4800910949707, |
|
"loss": 0.3765, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.77055424451828, |
|
"rewards/margins": 1.6154954433441162, |
|
"rewards/rejected": -2.386049747467041, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.8763134573996707, |
|
"grad_norm": 23.555963752594412, |
|
"learning_rate": 4.381112504031337e-10, |
|
"logits/chosen": -11.630104064941406, |
|
"logits/rejected": -10.956541061401367, |
|
"logps/chosen": -34.54252243041992, |
|
"logps/rejected": -60.827423095703125, |
|
"loss": 0.3183, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/chosen": -0.7591537833213806, |
|
"rewards/margins": 1.6892755031585693, |
|
"rewards/rejected": -2.4484293460845947, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.916824914546145, |
|
"grad_norm": 21.502729719070256, |
|
"learning_rate": 1.8121164396283638e-10, |
|
"logits/chosen": -11.620569229125977, |
|
"logits/rejected": -10.932313919067383, |
|
"logps/chosen": -36.305519104003906, |
|
"logps/rejected": -66.49055480957031, |
|
"loss": 0.3241, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.7203236222267151, |
|
"rewards/margins": 1.99014413356781, |
|
"rewards/rejected": -2.71046781539917, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.9573363716926195, |
|
"grad_norm": 22.58072518400253, |
|
"learning_rate": 3.581225918342645e-11, |
|
"logits/chosen": -11.6244535446167, |
|
"logits/rejected": -10.896235466003418, |
|
"logps/chosen": -31.406940460205078, |
|
"logps/rejected": -62.654396057128906, |
|
"loss": 0.3115, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.7061868906021118, |
|
"rewards/margins": 1.8940362930297852, |
|
"rewards/rejected": -2.6002233028411865, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.9897455374097985, |
|
"step": 738, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4158425409457871, |
|
"train_runtime": 20368.7971, |
|
"train_samples_per_second": 2.327, |
|
"train_steps_per_second": 0.036 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 738, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|