|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998751404669747, |
|
"eval_steps": 1000, |
|
"global_step": 4004, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000249719066050693, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 1.2468827930174565e-08, |
|
"logits/chosen": -2.450503349304199, |
|
"logits/rejected": -2.672837734222412, |
|
"logps/chosen": -21.34674835205078, |
|
"logps/rejected": -42.586097717285156, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00249719066050693, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 1.2468827930174566e-07, |
|
"logits/chosen": -2.275761604309082, |
|
"logits/rejected": -2.479705333709717, |
|
"logps/chosen": -22.14301300048828, |
|
"logps/rejected": -63.31869888305664, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00027842415147460997, |
|
"rewards/margins": -0.00017310140538029373, |
|
"rewards/rejected": -0.0001053227242664434, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00499438132101386, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 2.493765586034913e-07, |
|
"logits/chosen": -2.2202348709106445, |
|
"logits/rejected": -2.429389238357544, |
|
"logps/chosen": -21.814502716064453, |
|
"logps/rejected": -61.35728073120117, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 8.430716843577102e-05, |
|
"rewards/margins": 0.00037039705784991384, |
|
"rewards/rejected": -0.00028608986758627, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007491571981520789, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 3.7406483790523695e-07, |
|
"logits/chosen": -2.14150333404541, |
|
"logits/rejected": -2.3708083629608154, |
|
"logps/chosen": -22.1105899810791, |
|
"logps/rejected": -52.95900344848633, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -9.514805424259976e-05, |
|
"rewards/margins": -5.593679452431388e-05, |
|
"rewards/rejected": -3.9211259718285874e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00998876264202772, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 4.987531172069826e-07, |
|
"logits/chosen": -2.1455249786376953, |
|
"logits/rejected": -2.362419605255127, |
|
"logps/chosen": -22.628782272338867, |
|
"logps/rejected": -63.2244873046875, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0004498485941439867, |
|
"rewards/margins": 0.0016190257156267762, |
|
"rewards/rejected": -0.0011691770050674677, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.012485953302534648, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 6.234413965087283e-07, |
|
"logits/chosen": -2.2349250316619873, |
|
"logits/rejected": -2.495819568634033, |
|
"logps/chosen": -22.863269805908203, |
|
"logps/rejected": -59.4576416015625, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0006220145733095706, |
|
"rewards/margins": 0.0015806708252057433, |
|
"rewards/rejected": -0.0009586562518961728, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014983143963041578, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 7.481296758104739e-07, |
|
"logits/chosen": -2.169523239135742, |
|
"logits/rejected": -2.3751749992370605, |
|
"logps/chosen": -22.777694702148438, |
|
"logps/rejected": -68.83964538574219, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.0019476842135190964, |
|
"rewards/margins": 0.004409968852996826, |
|
"rewards/rejected": -0.002462285105139017, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.017480334623548508, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 8.728179551122195e-07, |
|
"logits/chosen": -2.286738634109497, |
|
"logits/rejected": -2.4896113872528076, |
|
"logps/chosen": -21.078710556030273, |
|
"logps/rejected": -50.04187774658203, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.004810997284948826, |
|
"rewards/margins": 0.007176141254603863, |
|
"rewards/rejected": -0.0023651437368243933, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01997752528405544, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 9.975062344139653e-07, |
|
"logits/chosen": -2.144176959991455, |
|
"logits/rejected": -2.352398633956909, |
|
"logps/chosen": -21.391971588134766, |
|
"logps/rejected": -56.86810302734375, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.010686805471777916, |
|
"rewards/margins": 0.01409011147916317, |
|
"rewards/rejected": -0.003403306705877185, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02247471594456237, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 1.1221945137157108e-06, |
|
"logits/chosen": -2.2526628971099854, |
|
"logits/rejected": -2.430774211883545, |
|
"logps/chosen": -19.845823287963867, |
|
"logps/rejected": -51.37982177734375, |
|
"loss": 0.4949, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.019541995599865913, |
|
"rewards/margins": 0.021860197186470032, |
|
"rewards/rejected": -0.0023182008881121874, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.024971906605069295, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 1.2468827930174565e-06, |
|
"logits/chosen": -2.1313042640686035, |
|
"logits/rejected": -2.3720927238464355, |
|
"logps/chosen": -20.160160064697266, |
|
"logps/rejected": -66.42484283447266, |
|
"loss": 0.4924, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.031359124928712845, |
|
"rewards/margins": 0.03116660751402378, |
|
"rewards/rejected": 0.00019251916091889143, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.027469097265576226, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 1.3715710723192023e-06, |
|
"logits/chosen": -2.1676554679870605, |
|
"logits/rejected": -2.389533758163452, |
|
"logps/chosen": -17.833478927612305, |
|
"logps/rejected": -60.63257598876953, |
|
"loss": 0.4879, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04782567545771599, |
|
"rewards/margins": 0.05032258480787277, |
|
"rewards/rejected": -0.002496910747140646, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.029966287926083156, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 1.4962593516209478e-06, |
|
"logits/chosen": -2.1279516220092773, |
|
"logits/rejected": -2.343705177307129, |
|
"logps/chosen": -15.757919311523438, |
|
"logps/rejected": -51.14020919799805, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06653784960508347, |
|
"rewards/margins": 0.06694493442773819, |
|
"rewards/rejected": -0.0004070843569934368, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.032463478586590086, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 1.6209476309226935e-06, |
|
"logits/chosen": -2.3082690238952637, |
|
"logits/rejected": -2.5344271659851074, |
|
"logps/chosen": -12.95374870300293, |
|
"logps/rejected": -53.89298629760742, |
|
"loss": 0.4766, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09254685044288635, |
|
"rewards/margins": 0.09660454094409943, |
|
"rewards/rejected": -0.0040576886385679245, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.034960669247097016, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 1.745635910224439e-06, |
|
"logits/chosen": -2.1814446449279785, |
|
"logits/rejected": -2.40262508392334, |
|
"logps/chosen": -11.56260871887207, |
|
"logps/rejected": -71.49890899658203, |
|
"loss": 0.4714, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11319079250097275, |
|
"rewards/margins": 0.11961270868778229, |
|
"rewards/rejected": -0.006421914789825678, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.037457859907603946, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 1.8703241895261848e-06, |
|
"logits/chosen": -2.2549407482147217, |
|
"logits/rejected": -2.4583637714385986, |
|
"logps/chosen": -8.707418441772461, |
|
"logps/rejected": -56.646148681640625, |
|
"loss": 0.4655, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13278979063034058, |
|
"rewards/margins": 0.14516989886760712, |
|
"rewards/rejected": -0.012380105443298817, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03995505056811088, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 1.9950124688279305e-06, |
|
"logits/chosen": -2.261176586151123, |
|
"logits/rejected": -2.454853057861328, |
|
"logps/chosen": -7.25634765625, |
|
"logps/rejected": -62.16912841796875, |
|
"loss": 0.4591, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14732162654399872, |
|
"rewards/margins": 0.1813906729221344, |
|
"rewards/rejected": -0.034069035202264786, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04245224122861781, |
|
"grad_norm": 0.228515625, |
|
"learning_rate": 2.119700748129676e-06, |
|
"logits/chosen": -2.413883686065674, |
|
"logits/rejected": -2.6421730518341064, |
|
"logps/chosen": -5.5545244216918945, |
|
"logps/rejected": -54.24146270751953, |
|
"loss": 0.4528, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16115576028823853, |
|
"rewards/margins": 0.21780212223529816, |
|
"rewards/rejected": -0.05664635822176933, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04494943188912474, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 2.2443890274314216e-06, |
|
"logits/chosen": -2.123264789581299, |
|
"logits/rejected": -2.3629353046417236, |
|
"logps/chosen": -5.675574779510498, |
|
"logps/rejected": -81.35579681396484, |
|
"loss": 0.448, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16601073741912842, |
|
"rewards/margins": 0.2519921362400055, |
|
"rewards/rejected": -0.08598136156797409, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04744662254963167, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 2.3690773067331675e-06, |
|
"logits/chosen": -2.162355899810791, |
|
"logits/rejected": -2.4037208557128906, |
|
"logps/chosen": -4.741239547729492, |
|
"logps/rejected": -69.67314147949219, |
|
"loss": 0.4382, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17391221225261688, |
|
"rewards/margins": 0.32386231422424316, |
|
"rewards/rejected": -0.14995010197162628, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04994381321013859, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 2.493765586034913e-06, |
|
"logits/chosen": -2.232464551925659, |
|
"logits/rejected": -2.461862087249756, |
|
"logps/chosen": -4.306845664978027, |
|
"logps/rejected": -70.49752807617188, |
|
"loss": 0.429, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18084710836410522, |
|
"rewards/margins": 0.39341551065444946, |
|
"rewards/rejected": -0.21256835758686066, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05244100387064552, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 2.6184538653366586e-06, |
|
"logits/chosen": -2.2186341285705566, |
|
"logits/rejected": -2.4293782711029053, |
|
"logps/chosen": -2.813771963119507, |
|
"logps/rejected": -77.77786254882812, |
|
"loss": 0.4186, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18635782599449158, |
|
"rewards/margins": 0.4745180010795593, |
|
"rewards/rejected": -0.28816017508506775, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05493819453115245, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 2.7431421446384045e-06, |
|
"logits/chosen": -2.2114510536193848, |
|
"logits/rejected": -2.423021078109741, |
|
"logps/chosen": -2.7164266109466553, |
|
"logps/rejected": -93.01399230957031, |
|
"loss": 0.4086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19216802716255188, |
|
"rewards/margins": 0.5540723204612732, |
|
"rewards/rejected": -0.3619043231010437, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05743538519165938, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 2.86783042394015e-06, |
|
"logits/chosen": -2.2182869911193848, |
|
"logits/rejected": -2.4157519340515137, |
|
"logps/chosen": -2.1753125190734863, |
|
"logps/rejected": -96.47676086425781, |
|
"loss": 0.3976, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19363494217395782, |
|
"rewards/margins": 0.6491508483886719, |
|
"rewards/rejected": -0.45551595091819763, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05993257585216631, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 2.9925187032418956e-06, |
|
"logits/chosen": -2.303800344467163, |
|
"logits/rejected": -2.5223240852355957, |
|
"logps/chosen": -2.2545647621154785, |
|
"logps/rejected": -115.70625305175781, |
|
"loss": 0.3757, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19811145961284637, |
|
"rewards/margins": 0.8461275100708008, |
|
"rewards/rejected": -0.6480159759521484, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06242976651267324, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 3.117206982543641e-06, |
|
"logits/chosen": -2.2053685188293457, |
|
"logits/rejected": -2.415367841720581, |
|
"logps/chosen": -2.1990444660186768, |
|
"logps/rejected": -140.34054565429688, |
|
"loss": 0.3542, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19701093435287476, |
|
"rewards/margins": 1.0766099691390991, |
|
"rewards/rejected": -0.8795989751815796, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06492695717318017, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 3.241895261845387e-06, |
|
"logits/chosen": -2.224290132522583, |
|
"logits/rejected": -2.4391043186187744, |
|
"logps/chosen": -1.894426941871643, |
|
"logps/rejected": -191.0155029296875, |
|
"loss": 0.3217, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19716738164424896, |
|
"rewards/margins": 1.5208184719085693, |
|
"rewards/rejected": -1.3236511945724487, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0674241478336871, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 3.3665835411471326e-06, |
|
"logits/chosen": -2.1803958415985107, |
|
"logits/rejected": -2.3852007389068604, |
|
"logps/chosen": -2.2776474952697754, |
|
"logps/rejected": -256.2982177734375, |
|
"loss": 0.2905, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19975684583187103, |
|
"rewards/margins": 2.1906659603118896, |
|
"rewards/rejected": -1.990909218788147, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06992133849419403, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 3.491271820448878e-06, |
|
"logits/chosen": -2.089259624481201, |
|
"logits/rejected": -2.2738101482391357, |
|
"logps/chosen": -3.7932281494140625, |
|
"logps/rejected": -315.3883361816406, |
|
"loss": 0.2858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19718244671821594, |
|
"rewards/margins": 2.653756856918335, |
|
"rewards/rejected": -2.4565746784210205, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07241852915470096, |
|
"grad_norm": 0.138671875, |
|
"learning_rate": 3.615960099750624e-06, |
|
"logits/chosen": -2.136627674102783, |
|
"logits/rejected": -2.336648941040039, |
|
"logps/chosen": -2.27809476852417, |
|
"logps/rejected": -309.0271911621094, |
|
"loss": 0.281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19811423122882843, |
|
"rewards/margins": 2.7192320823669434, |
|
"rewards/rejected": -2.521117687225342, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07491571981520789, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 3.7406483790523696e-06, |
|
"logits/chosen": -2.1298162937164307, |
|
"logits/rejected": -2.3403031826019287, |
|
"logps/chosen": -2.7181735038757324, |
|
"logps/rejected": -379.2640075683594, |
|
"loss": 0.2649, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1967845857143402, |
|
"rewards/margins": 3.449932813644409, |
|
"rewards/rejected": -3.253148317337036, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07741291047571482, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 3.8653366583541155e-06, |
|
"logits/chosen": -2.0690829753875732, |
|
"logits/rejected": -2.240788459777832, |
|
"logps/chosen": -2.222135066986084, |
|
"logps/rejected": -404.05157470703125, |
|
"loss": 0.2741, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19702208042144775, |
|
"rewards/margins": 3.7276394367218018, |
|
"rewards/rejected": -3.5306174755096436, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07991010113622175, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 3.990024937655861e-06, |
|
"logits/chosen": -2.0671050548553467, |
|
"logits/rejected": -2.24275279045105, |
|
"logps/chosen": -2.2376856803894043, |
|
"logps/rejected": -507.495849609375, |
|
"loss": 0.2612, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1967649608850479, |
|
"rewards/margins": 4.618912696838379, |
|
"rewards/rejected": -4.422147750854492, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08240729179672868, |
|
"grad_norm": 0.1611328125, |
|
"learning_rate": 4.114713216957607e-06, |
|
"logits/chosen": -2.137000560760498, |
|
"logits/rejected": -2.287095546722412, |
|
"logps/chosen": -2.6727747917175293, |
|
"logps/rejected": -397.1515808105469, |
|
"loss": 0.2652, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1976507008075714, |
|
"rewards/margins": 3.678623914718628, |
|
"rewards/rejected": -3.480973482131958, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08490448245723561, |
|
"grad_norm": 0.21484375, |
|
"learning_rate": 4.239401496259352e-06, |
|
"logits/chosen": -2.0656325817108154, |
|
"logits/rejected": -2.2314834594726562, |
|
"logps/chosen": -2.123012065887451, |
|
"logps/rejected": -494.6885681152344, |
|
"loss": 0.2573, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19615662097930908, |
|
"rewards/margins": 4.528371810913086, |
|
"rewards/rejected": -4.332215309143066, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08740167311774254, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 4.364089775561098e-06, |
|
"logits/chosen": -2.1637234687805176, |
|
"logits/rejected": -2.3083388805389404, |
|
"logps/chosen": -2.9447762966156006, |
|
"logps/rejected": -453.163330078125, |
|
"loss": 0.264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18918542563915253, |
|
"rewards/margins": 4.223211288452148, |
|
"rewards/rejected": -4.034026145935059, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08989886377824947, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 4.488778054862843e-06, |
|
"logits/chosen": -2.1501951217651367, |
|
"logits/rejected": -2.341325521469116, |
|
"logps/chosen": -4.00003719329834, |
|
"logps/rejected": -510.6114196777344, |
|
"loss": 0.2492, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18887588381767273, |
|
"rewards/margins": 4.7241339683532715, |
|
"rewards/rejected": -4.5352582931518555, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.0923960544387564, |
|
"grad_norm": 0.228515625, |
|
"learning_rate": 4.6134663341645895e-06, |
|
"logits/chosen": -2.152017593383789, |
|
"logits/rejected": -2.326498508453369, |
|
"logps/chosen": -3.2789077758789062, |
|
"logps/rejected": -488.865966796875, |
|
"loss": 0.2472, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19166019558906555, |
|
"rewards/margins": 4.529562473297119, |
|
"rewards/rejected": -4.337902069091797, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09489324509926333, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 4.738154613466335e-06, |
|
"logits/chosen": -2.0966598987579346, |
|
"logits/rejected": -2.3076987266540527, |
|
"logps/chosen": -3.7783362865448, |
|
"logps/rejected": -743.3594970703125, |
|
"loss": 0.2398, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18687334656715393, |
|
"rewards/margins": 6.980570316314697, |
|
"rewards/rejected": -6.793696403503418, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09739043575977026, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 4.862842892768081e-06, |
|
"logits/chosen": -2.1418652534484863, |
|
"logits/rejected": -2.30336332321167, |
|
"logps/chosen": -2.9560298919677734, |
|
"logps/rejected": -607.9320068359375, |
|
"loss": 0.2388, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18996365368366241, |
|
"rewards/margins": 5.754693031311035, |
|
"rewards/rejected": -5.564728736877441, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09988762642027718, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 4.987531172069826e-06, |
|
"logits/chosen": -2.0703442096710205, |
|
"logits/rejected": -2.2270889282226562, |
|
"logps/chosen": -2.578680992126465, |
|
"logps/rejected": -683.11083984375, |
|
"loss": 0.2415, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19460181891918182, |
|
"rewards/margins": 6.500932216644287, |
|
"rewards/rejected": -6.30633020401001, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10238481708078412, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 4.999923022460671e-06, |
|
"logits/chosen": -2.0380523204803467, |
|
"logits/rejected": -2.2315127849578857, |
|
"logps/chosen": -4.086075782775879, |
|
"logps/rejected": -833.37255859375, |
|
"loss": 0.2328, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19286975264549255, |
|
"rewards/margins": 7.908270835876465, |
|
"rewards/rejected": -7.715400695800781, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10488200774129104, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 4.999656933348981e-06, |
|
"logits/chosen": -2.244335174560547, |
|
"logits/rejected": -2.4024062156677246, |
|
"logps/chosen": -2.923116445541382, |
|
"logps/rejected": -593.464599609375, |
|
"loss": 0.241, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19478723406791687, |
|
"rewards/margins": 5.600838661193848, |
|
"rewards/rejected": -5.4060516357421875, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10737919840179798, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 4.99920080255011e-06, |
|
"logits/chosen": -2.077357769012451, |
|
"logits/rejected": -2.282799243927002, |
|
"logps/chosen": -2.9383771419525146, |
|
"logps/rejected": -852.4064331054688, |
|
"loss": 0.231, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19819210469722748, |
|
"rewards/margins": 8.028984069824219, |
|
"rewards/rejected": -7.830792427062988, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1098763890623049, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 4.998554664742362e-06, |
|
"logits/chosen": -2.148183822631836, |
|
"logits/rejected": -2.3020401000976562, |
|
"logps/chosen": -1.9824367761611938, |
|
"logps/rejected": -745.6473999023438, |
|
"loss": 0.2322, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1946963667869568, |
|
"rewards/margins": 7.165565490722656, |
|
"rewards/rejected": -6.9708685874938965, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11237357972281184, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 4.997718569049726e-06, |
|
"logits/chosen": -2.094149351119995, |
|
"logits/rejected": -2.2727301120758057, |
|
"logps/chosen": -3.559483051300049, |
|
"logps/rejected": -817.2952270507812, |
|
"loss": 0.2319, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19730597734451294, |
|
"rewards/margins": 7.785311222076416, |
|
"rewards/rejected": -7.588005065917969, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11487077038331876, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 4.9966925790381404e-06, |
|
"logits/chosen": -2.1491434574127197, |
|
"logits/rejected": -2.301217555999756, |
|
"logps/chosen": -1.5461114645004272, |
|
"logps/rejected": -810.7796020507812, |
|
"loss": 0.2326, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19482001662254333, |
|
"rewards/margins": 7.743639945983887, |
|
"rewards/rejected": -7.548819541931152, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1173679610438257, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 4.995476772710657e-06, |
|
"logits/chosen": -2.1041364669799805, |
|
"logits/rejected": -2.3101038932800293, |
|
"logps/chosen": -3.1227645874023438, |
|
"logps/rejected": -963.2913208007812, |
|
"loss": 0.2321, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1952921450138092, |
|
"rewards/margins": 9.208600044250488, |
|
"rewards/rejected": -9.013307571411133, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11986515170433262, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 4.994071242501516e-06, |
|
"logits/chosen": -2.1944689750671387, |
|
"logits/rejected": -2.371983051300049, |
|
"logps/chosen": -2.822134494781494, |
|
"logps/rejected": -869.8029174804688, |
|
"loss": 0.2298, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19522327184677124, |
|
"rewards/margins": 8.3977632522583, |
|
"rewards/rejected": -8.20253849029541, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12236234236483956, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 4.992476095269112e-06, |
|
"logits/chosen": -2.2050843238830566, |
|
"logits/rejected": -2.3897545337677, |
|
"logps/chosen": -1.4868861436843872, |
|
"logps/rejected": -922.6173095703125, |
|
"loss": 0.2305, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2005070000886917, |
|
"rewards/margins": 8.763871192932129, |
|
"rewards/rejected": -8.563364028930664, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.12485953302534648, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 4.990691452287877e-06, |
|
"logits/chosen": -2.042813777923584, |
|
"logits/rejected": -2.213289976119995, |
|
"logps/chosen": -2.393306016921997, |
|
"logps/rejected": -886.4241943359375, |
|
"loss": 0.2303, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20110133290290833, |
|
"rewards/margins": 8.474591255187988, |
|
"rewards/rejected": -8.273489952087402, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1273567236858534, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 4.988717449239056e-06, |
|
"logits/chosen": -2.093723773956299, |
|
"logits/rejected": -2.2634453773498535, |
|
"logps/chosen": -1.9311176538467407, |
|
"logps/rejected": -851.02734375, |
|
"loss": 0.2347, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19648316502571106, |
|
"rewards/margins": 8.190296173095703, |
|
"rewards/rejected": -7.993813991546631, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12985391434636034, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 4.98655423620039e-06, |
|
"logits/chosen": -2.1161797046661377, |
|
"logits/rejected": -2.3049392700195312, |
|
"logps/chosen": -1.9681230783462524, |
|
"logps/rejected": -963.2742919921875, |
|
"loss": 0.2275, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20181334018707275, |
|
"rewards/margins": 9.243757247924805, |
|
"rewards/rejected": -9.041942596435547, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13235110500686728, |
|
"grad_norm": 0.07373046875, |
|
"learning_rate": 4.984201977634711e-06, |
|
"logits/chosen": -2.223388195037842, |
|
"logits/rejected": -2.4297728538513184, |
|
"logps/chosen": -2.4097044467926025, |
|
"logps/rejected": -1106.8994140625, |
|
"loss": 0.2278, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2010197937488556, |
|
"rewards/margins": 10.71354866027832, |
|
"rewards/rejected": -10.512530326843262, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1348482956673742, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 4.9816608523774345e-06, |
|
"logits/chosen": -2.119506359100342, |
|
"logits/rejected": -2.305849552154541, |
|
"logps/chosen": -2.257546901702881, |
|
"logps/rejected": -930.5267333984375, |
|
"loss": 0.2306, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19777485728263855, |
|
"rewards/margins": 8.905478477478027, |
|
"rewards/rejected": -8.707704544067383, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.13734548632788113, |
|
"grad_norm": 0.07470703125, |
|
"learning_rate": 4.978931053622964e-06, |
|
"logits/chosen": -2.1544103622436523, |
|
"logits/rejected": -2.354814052581787, |
|
"logps/chosen": -1.3565616607666016, |
|
"logps/rejected": -950.23681640625, |
|
"loss": 0.2307, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2015368640422821, |
|
"rewards/margins": 9.141637802124023, |
|
"rewards/rejected": -8.940099716186523, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.13984267698838806, |
|
"grad_norm": 0.035400390625, |
|
"learning_rate": 4.9760127889100044e-06, |
|
"logits/chosen": -2.1699581146240234, |
|
"logits/rejected": -2.3422303199768066, |
|
"logps/chosen": -1.4560916423797607, |
|
"logps/rejected": -1047.3670654296875, |
|
"loss": 0.23, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19995173811912537, |
|
"rewards/margins": 10.138322830200195, |
|
"rewards/rejected": -9.938371658325195, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.142339867648895, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 4.972906280105781e-06, |
|
"logits/chosen": -2.0392138957977295, |
|
"logits/rejected": -2.2401204109191895, |
|
"logps/chosen": -2.1844277381896973, |
|
"logps/rejected": -998.3021240234375, |
|
"loss": 0.2281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20644374191761017, |
|
"rewards/margins": 9.561029434204102, |
|
"rewards/rejected": -9.354585647583008, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1448370583094019, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 4.969611763389175e-06, |
|
"logits/chosen": -2.2010245323181152, |
|
"logits/rejected": -2.3933498859405518, |
|
"logps/chosen": -2.1393237113952637, |
|
"logps/rejected": -925.5234375, |
|
"loss": 0.2289, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1972513645887375, |
|
"rewards/margins": 8.949918746948242, |
|
"rewards/rejected": -8.752666473388672, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14733424896990885, |
|
"grad_norm": 0.0751953125, |
|
"learning_rate": 4.966129489232762e-06, |
|
"logits/chosen": -2.1333353519439697, |
|
"logits/rejected": -2.3556675910949707, |
|
"logps/chosen": -2.2460904121398926, |
|
"logps/rejected": -1139.327392578125, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20897097885608673, |
|
"rewards/margins": 10.926295280456543, |
|
"rewards/rejected": -10.717325210571289, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14983143963041579, |
|
"grad_norm": 0.12890625, |
|
"learning_rate": 4.962459722383775e-06, |
|
"logits/chosen": -2.095088243484497, |
|
"logits/rejected": -2.2931671142578125, |
|
"logps/chosen": -2.7135472297668457, |
|
"logps/rejected": -1181.6075439453125, |
|
"loss": 0.2292, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20306305587291718, |
|
"rewards/margins": 11.351381301879883, |
|
"rewards/rejected": -11.148316383361816, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15232863029092272, |
|
"grad_norm": 0.0303955078125, |
|
"learning_rate": 4.958602741843975e-06, |
|
"logits/chosen": -2.0957350730895996, |
|
"logits/rejected": -2.3226089477539062, |
|
"logps/chosen": -2.8655078411102295, |
|
"logps/rejected": -1118.1968994140625, |
|
"loss": 0.2277, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19633761048316956, |
|
"rewards/margins": 10.744343757629395, |
|
"rewards/rejected": -10.548004150390625, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.15482582095142963, |
|
"grad_norm": 0.107421875, |
|
"learning_rate": 4.954558840848437e-06, |
|
"logits/chosen": -2.211951494216919, |
|
"logits/rejected": -2.3932459354400635, |
|
"logps/chosen": -1.5332846641540527, |
|
"logps/rejected": -932.4984130859375, |
|
"loss": 0.2285, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20827274024486542, |
|
"rewards/margins": 9.049389839172363, |
|
"rewards/rejected": -8.841116905212402, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15732301161193657, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 4.950328326843258e-06, |
|
"logits/chosen": -2.073488712310791, |
|
"logits/rejected": -2.2822651863098145, |
|
"logps/chosen": -0.9946017265319824, |
|
"logps/rejected": -1086.56689453125, |
|
"loss": 0.2291, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20773670077323914, |
|
"rewards/margins": 10.345720291137695, |
|
"rewards/rejected": -10.137983322143555, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.1598202022724435, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 4.945911521462182e-06, |
|
"logits/chosen": -2.2225770950317383, |
|
"logits/rejected": -2.412863254547119, |
|
"logps/chosen": -1.7764488458633423, |
|
"logps/rejected": -1141.6427001953125, |
|
"loss": 0.2286, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20446841418743134, |
|
"rewards/margins": 11.000048637390137, |
|
"rewards/rejected": -10.79557991027832, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.16231739293295044, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 4.941308760502149e-06, |
|
"logits/chosen": -2.211944341659546, |
|
"logits/rejected": -2.371511697769165, |
|
"logps/chosen": -2.542166233062744, |
|
"logps/rejected": -972.3176879882812, |
|
"loss": 0.2319, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.200990229845047, |
|
"rewards/margins": 9.126736640930176, |
|
"rewards/rejected": -8.925745964050293, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.16481458359345735, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 4.936520393897762e-06, |
|
"logits/chosen": -2.1870148181915283, |
|
"logits/rejected": -2.4076366424560547, |
|
"logps/chosen": -2.055567979812622, |
|
"logps/rejected": -1019.7349853515625, |
|
"loss": 0.2287, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21482977271080017, |
|
"rewards/margins": 9.75967788696289, |
|
"rewards/rejected": -9.54484748840332, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.1673117742539643, |
|
"grad_norm": 0.042724609375, |
|
"learning_rate": 4.931546785694684e-06, |
|
"logits/chosen": -2.207019090652466, |
|
"logits/rejected": -2.411149740219116, |
|
"logps/chosen": -1.447061538696289, |
|
"logps/rejected": -1274.262451171875, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2061387598514557, |
|
"rewards/margins": 12.430362701416016, |
|
"rewards/rejected": -12.224225044250488, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16980896491447123, |
|
"grad_norm": 0.031982421875, |
|
"learning_rate": 4.926388314021964e-06, |
|
"logits/chosen": -2.245506763458252, |
|
"logits/rejected": -2.439272403717041, |
|
"logps/chosen": -1.3953222036361694, |
|
"logps/rejected": -1066.398193359375, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.210698202252388, |
|
"rewards/margins": 10.378253936767578, |
|
"rewards/rejected": -10.167555809020996, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.17230615557497814, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 4.921045371063283e-06, |
|
"logits/chosen": -2.235975980758667, |
|
"logits/rejected": -2.42988920211792, |
|
"logps/chosen": -0.8631747961044312, |
|
"logps/rejected": -1208.173095703125, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2110184133052826, |
|
"rewards/margins": 11.736184120178223, |
|
"rewards/rejected": -11.525165557861328, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.17480334623548507, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 4.915518363027142e-06, |
|
"logits/chosen": -2.29992938041687, |
|
"logits/rejected": -2.4797685146331787, |
|
"logps/chosen": -0.5947138667106628, |
|
"logps/rejected": -1052.22216796875, |
|
"loss": 0.2272, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2057635486125946, |
|
"rewards/margins": 10.251365661621094, |
|
"rewards/rejected": -10.045602798461914, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.177300536895992, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 4.909807710115977e-06, |
|
"logits/chosen": -2.0681312084198, |
|
"logits/rejected": -2.245760202407837, |
|
"logps/chosen": -1.667133092880249, |
|
"logps/rejected": -1234.741943359375, |
|
"loss": 0.2287, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19957289099693298, |
|
"rewards/margins": 12.013802528381348, |
|
"rewards/rejected": -11.814229011535645, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.17979772755649895, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 4.903913846494211e-06, |
|
"logits/chosen": -2.0854830741882324, |
|
"logits/rejected": -2.318626880645752, |
|
"logps/chosen": -1.4859822988510132, |
|
"logps/rejected": -1401.390625, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21390756964683533, |
|
"rewards/margins": 13.512557983398438, |
|
"rewards/rejected": -13.298650741577148, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.18229491821700586, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 4.897837220255251e-06, |
|
"logits/chosen": -2.105733633041382, |
|
"logits/rejected": -2.273578643798828, |
|
"logps/chosen": -1.5127496719360352, |
|
"logps/rejected": -1189.6934814453125, |
|
"loss": 0.2282, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21006134152412415, |
|
"rewards/margins": 11.488363265991211, |
|
"rewards/rejected": -11.278302192687988, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.1847921088775128, |
|
"grad_norm": 0.06982421875, |
|
"learning_rate": 4.891578293387413e-06, |
|
"logits/chosen": -2.1760973930358887, |
|
"logits/rejected": -2.3570103645324707, |
|
"logps/chosen": -1.769789695739746, |
|
"logps/rejected": -1201.271240234375, |
|
"loss": 0.2279, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20501787960529327, |
|
"rewards/margins": 11.696678161621094, |
|
"rewards/rejected": -11.491661071777344, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.18728929953801973, |
|
"grad_norm": 0.031982421875, |
|
"learning_rate": 4.885137541738808e-06, |
|
"logits/chosen": -2.141007423400879, |
|
"logits/rejected": -2.313952922821045, |
|
"logps/chosen": -0.702928900718689, |
|
"logps/rejected": -1086.88330078125, |
|
"loss": 0.227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20619484782218933, |
|
"rewards/margins": 10.44408893585205, |
|
"rewards/rejected": -10.237894058227539, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18978649019852667, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 4.878515454981153e-06, |
|
"logits/chosen": -2.0163445472717285, |
|
"logits/rejected": -2.219290256500244, |
|
"logps/chosen": -1.4322102069854736, |
|
"logps/rejected": -1299.561767578125, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20874173939228058, |
|
"rewards/margins": 12.519464492797852, |
|
"rewards/rejected": -12.310722351074219, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.19228368085903358, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 4.8717125365725545e-06, |
|
"logits/chosen": -2.2308189868927, |
|
"logits/rejected": -2.3827383518218994, |
|
"logps/chosen": -1.321045160293579, |
|
"logps/rejected": -954.9481201171875, |
|
"loss": 0.2298, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2083440124988556, |
|
"rewards/margins": 9.28177547454834, |
|
"rewards/rejected": -9.073431015014648, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.19478087151954052, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 4.864729303719221e-06, |
|
"logits/chosen": -2.1831257343292236, |
|
"logits/rejected": -2.386863946914673, |
|
"logps/chosen": -1.462869644165039, |
|
"logps/rejected": -1309.128662109375, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21388690173625946, |
|
"rewards/margins": 12.65107250213623, |
|
"rewards/rejected": -12.437185287475586, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.19727806218004745, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 4.857566287336152e-06, |
|
"logits/chosen": -2.125136375427246, |
|
"logits/rejected": -2.3306586742401123, |
|
"logps/chosen": -1.5712594985961914, |
|
"logps/rejected": -1211.277587890625, |
|
"loss": 0.2289, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21211902797222137, |
|
"rewards/margins": 11.6867094039917, |
|
"rewards/rejected": -11.474590301513672, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19977525284055436, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 4.850224032006765e-06, |
|
"logits/chosen": -2.226292610168457, |
|
"logits/rejected": -2.4260332584381104, |
|
"logps/chosen": -1.096842885017395, |
|
"logps/rejected": -1190.5208740234375, |
|
"loss": 0.2266, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21329161524772644, |
|
"rewards/margins": 11.604973793029785, |
|
"rewards/rejected": -11.391681671142578, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2022724435010613, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 4.8427030959414984e-06, |
|
"logits/chosen": -2.0340332984924316, |
|
"logits/rejected": -2.239582061767578, |
|
"logps/chosen": -1.4298118352890015, |
|
"logps/rejected": -1246.587158203125, |
|
"loss": 0.2286, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.203706294298172, |
|
"rewards/margins": 12.136808395385742, |
|
"rewards/rejected": -11.933099746704102, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.20476963416156824, |
|
"grad_norm": 0.0400390625, |
|
"learning_rate": 4.835004050935369e-06, |
|
"logits/chosen": -2.142270803451538, |
|
"logits/rejected": -2.3261685371398926, |
|
"logps/chosen": -2.205761432647705, |
|
"logps/rejected": -1209.187744140625, |
|
"loss": 0.2294, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21324896812438965, |
|
"rewards/margins": 11.711974143981934, |
|
"rewards/rejected": -11.498725891113281, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.20726682482207517, |
|
"grad_norm": 0.0257568359375, |
|
"learning_rate": 4.8271274823245e-06, |
|
"logits/chosen": -2.130068778991699, |
|
"logits/rejected": -2.303924083709717, |
|
"logps/chosen": -1.5450295209884644, |
|
"logps/rejected": -1218.6636962890625, |
|
"loss": 0.2285, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2086503505706787, |
|
"rewards/margins": 11.845584869384766, |
|
"rewards/rejected": -11.636935234069824, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20976401548258208, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 4.8190739889416264e-06, |
|
"logits/chosen": -2.1227643489837646, |
|
"logits/rejected": -2.3156332969665527, |
|
"logps/chosen": -1.4759693145751953, |
|
"logps/rejected": -1314.2388916015625, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21140392124652863, |
|
"rewards/margins": 12.794939994812012, |
|
"rewards/rejected": -12.583536148071289, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.21226120614308902, |
|
"grad_norm": 0.0264892578125, |
|
"learning_rate": 4.810844183070553e-06, |
|
"logits/chosen": -2.2195773124694824, |
|
"logits/rejected": -2.416642665863037, |
|
"logps/chosen": -1.3944060802459717, |
|
"logps/rejected": -1100.637939453125, |
|
"loss": 0.2267, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20567412674427032, |
|
"rewards/margins": 10.635955810546875, |
|
"rewards/rejected": -10.430280685424805, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.21475839680359596, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 4.802438690399622e-06, |
|
"logits/chosen": -2.170403480529785, |
|
"logits/rejected": -2.3731253147125244, |
|
"logps/chosen": -0.7113627195358276, |
|
"logps/rejected": -1192.8896484375, |
|
"loss": 0.2279, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20982804894447327, |
|
"rewards/margins": 11.532899856567383, |
|
"rewards/rejected": -11.32307243347168, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2172555874641029, |
|
"grad_norm": 0.06201171875, |
|
"learning_rate": 4.793858149974129e-06, |
|
"logits/chosen": -2.134357452392578, |
|
"logits/rejected": -2.3488316535949707, |
|
"logps/chosen": -1.1498069763183594, |
|
"logps/rejected": -1405.57177734375, |
|
"loss": 0.2269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2079639434814453, |
|
"rewards/margins": 13.713908195495605, |
|
"rewards/rejected": -13.505943298339844, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2197527781246098, |
|
"grad_norm": 0.03857421875, |
|
"learning_rate": 4.785103214147747e-06, |
|
"logits/chosen": -2.244509220123291, |
|
"logits/rejected": -2.446852445602417, |
|
"logps/chosen": -1.082582950592041, |
|
"logps/rejected": -1192.0093994140625, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20774182677268982, |
|
"rewards/margins": 11.592524528503418, |
|
"rewards/rejected": -11.384782791137695, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.22224996878511674, |
|
"grad_norm": 0.0118408203125, |
|
"learning_rate": 4.776174548532926e-06, |
|
"logits/chosen": -2.1576988697052, |
|
"logits/rejected": -2.3463644981384277, |
|
"logps/chosen": -1.1917221546173096, |
|
"logps/rejected": -1265.5885009765625, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20792751014232635, |
|
"rewards/margins": 12.278467178344727, |
|
"rewards/rejected": -12.070539474487305, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.22474715944562368, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 4.767072831950288e-06, |
|
"logits/chosen": -2.2008862495422363, |
|
"logits/rejected": -2.402891159057617, |
|
"logps/chosen": -1.2017600536346436, |
|
"logps/rejected": -1313.045654296875, |
|
"loss": 0.2255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2119072675704956, |
|
"rewards/margins": 12.807563781738281, |
|
"rewards/rejected": -12.59565544128418, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.22724435010613062, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 4.7577987563770226e-06, |
|
"logits/chosen": -2.1067652702331543, |
|
"logits/rejected": -2.324591875076294, |
|
"logps/chosen": -2.000681161880493, |
|
"logps/rejected": -1264.68115234375, |
|
"loss": 0.228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2102380096912384, |
|
"rewards/margins": 12.193601608276367, |
|
"rewards/rejected": -11.983363151550293, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22974154076663753, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 4.748353026894273e-06, |
|
"logits/chosen": -2.1624951362609863, |
|
"logits/rejected": -2.3448517322540283, |
|
"logps/chosen": -1.4960781335830688, |
|
"logps/rejected": -1188.14990234375, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2139265537261963, |
|
"rewards/margins": 11.510043144226074, |
|
"rewards/rejected": -11.29611587524414, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.23223873142714446, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 4.738736361633532e-06, |
|
"logits/chosen": -2.25258207321167, |
|
"logits/rejected": -2.4271512031555176, |
|
"logps/chosen": -1.7973697185516357, |
|
"logps/rejected": -1126.24267578125, |
|
"loss": 0.228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20842309296131134, |
|
"rewards/margins": 10.903474807739258, |
|
"rewards/rejected": -10.695051193237305, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2347359220876514, |
|
"grad_norm": 0.06103515625, |
|
"learning_rate": 4.728949491722046e-06, |
|
"logits/chosen": -2.274840831756592, |
|
"logits/rejected": -2.4521872997283936, |
|
"logps/chosen": -0.652289092540741, |
|
"logps/rejected": -1062.56494140625, |
|
"loss": 0.2295, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20718173682689667, |
|
"rewards/margins": 10.335628509521484, |
|
"rewards/rejected": -10.128446578979492, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2372331127481583, |
|
"grad_norm": 0.0751953125, |
|
"learning_rate": 4.718993161227231e-06, |
|
"logits/chosen": -2.172180414199829, |
|
"logits/rejected": -2.4125022888183594, |
|
"logps/chosen": -1.2400215864181519, |
|
"logps/rejected": -1376.037841796875, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21632233262062073, |
|
"rewards/margins": 13.414273262023926, |
|
"rewards/rejected": -13.197952270507812, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23973030340866525, |
|
"grad_norm": 0.00982666015625, |
|
"learning_rate": 4.708868127100098e-06, |
|
"logits/chosen": -2.2069010734558105, |
|
"logits/rejected": -2.3836076259613037, |
|
"logps/chosen": -0.6828838586807251, |
|
"logps/rejected": -1159.0107421875, |
|
"loss": 0.2269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20559605956077576, |
|
"rewards/margins": 11.286005973815918, |
|
"rewards/rejected": -11.080410957336426, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.24222749406917218, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 4.6985751591177075e-06, |
|
"logits/chosen": -2.0572152137756348, |
|
"logits/rejected": -2.2502310276031494, |
|
"logps/chosen": -1.7850786447525024, |
|
"logps/rejected": -1321.8499755859375, |
|
"loss": 0.2266, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.212154358625412, |
|
"rewards/margins": 12.830732345581055, |
|
"rewards/rejected": -12.618578910827637, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.24472468472967912, |
|
"grad_norm": 0.09716796875, |
|
"learning_rate": 4.688115039824648e-06, |
|
"logits/chosen": -2.1182241439819336, |
|
"logits/rejected": -2.292884349822998, |
|
"logps/chosen": -0.9138596653938293, |
|
"logps/rejected": -1220.1195068359375, |
|
"loss": 0.2269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2072029858827591, |
|
"rewards/margins": 11.845842361450195, |
|
"rewards/rejected": -11.638639450073242, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.24722187539018603, |
|
"grad_norm": 0.1005859375, |
|
"learning_rate": 4.677488564473535e-06, |
|
"logits/chosen": -2.076742649078369, |
|
"logits/rejected": -2.280050754547119, |
|
"logps/chosen": -2.1341259479522705, |
|
"logps/rejected": -1361.389404296875, |
|
"loss": 0.2274, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20143508911132812, |
|
"rewards/margins": 13.227249145507812, |
|
"rewards/rejected": -13.0258150100708, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.24971906605069297, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 4.666696540964556e-06, |
|
"logits/chosen": -2.205030918121338, |
|
"logits/rejected": -2.380605697631836, |
|
"logps/chosen": -1.0865452289581299, |
|
"logps/rejected": -1183.8802490234375, |
|
"loss": 0.2255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21310412883758545, |
|
"rewards/margins": 11.559179306030273, |
|
"rewards/rejected": -11.346075057983398, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24971906605069297, |
|
"eval_logits/chosen": -2.551421880722046, |
|
"eval_logits/rejected": -2.637223482131958, |
|
"eval_logps/chosen": -0.39880600571632385, |
|
"eval_logps/rejected": -585.1870727539062, |
|
"eval_loss": 0.22298085689544678, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.25514695048332214, |
|
"eval_rewards/margins": 5.658298015594482, |
|
"eval_rewards/rejected": -5.403151035308838, |
|
"eval_runtime": 0.6597, |
|
"eval_samples_per_second": 7.579, |
|
"eval_steps_per_second": 4.548, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2522162567111999, |
|
"grad_norm": 0.0361328125, |
|
"learning_rate": 4.6557397897840454e-06, |
|
"logits/chosen": -2.226627826690674, |
|
"logits/rejected": -2.434197187423706, |
|
"logps/chosen": -1.4807536602020264, |
|
"logps/rejected": -1233.5753173828125, |
|
"loss": 0.2295, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21030649542808533, |
|
"rewards/margins": 11.924067497253418, |
|
"rewards/rejected": -11.713762283325195, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2547134473717068, |
|
"grad_norm": 0.0311279296875, |
|
"learning_rate": 4.644619143942108e-06, |
|
"logits/chosen": -2.1962525844573975, |
|
"logits/rejected": -2.418130397796631, |
|
"logps/chosen": -1.2743520736694336, |
|
"logps/rejected": -1324.01123046875, |
|
"loss": 0.225, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2114056795835495, |
|
"rewards/margins": 12.735904693603516, |
|
"rewards/rejected": -12.524497985839844, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.2572106380322138, |
|
"grad_norm": 0.1162109375, |
|
"learning_rate": 4.633335448909284e-06, |
|
"logits/chosen": -2.0575506687164307, |
|
"logits/rejected": -2.2430522441864014, |
|
"logps/chosen": -1.6322782039642334, |
|
"logps/rejected": -1251.030029296875, |
|
"loss": 0.2257, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21490998566150665, |
|
"rewards/margins": 12.10401725769043, |
|
"rewards/rejected": -11.889106750488281, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2597078286927207, |
|
"grad_norm": 0.10400390625, |
|
"learning_rate": 4.621889562552272e-06, |
|
"logits/chosen": -2.1623690128326416, |
|
"logits/rejected": -2.387530565261841, |
|
"logps/chosen": -1.5265319347381592, |
|
"logps/rejected": -1406.755615234375, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21330364048480988, |
|
"rewards/margins": 13.666200637817383, |
|
"rewards/rejected": -13.452896118164062, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2622050193532276, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 4.610282355068707e-06, |
|
"logits/chosen": -2.265820264816284, |
|
"logits/rejected": -2.481659412384033, |
|
"logps/chosen": -1.5380371809005737, |
|
"logps/rejected": -1449.8046875, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2142389565706253, |
|
"rewards/margins": 14.062037467956543, |
|
"rewards/rejected": -13.847798347473145, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.26470221001373456, |
|
"grad_norm": 0.06787109375, |
|
"learning_rate": 4.598514708921006e-06, |
|
"logits/chosen": -2.249868869781494, |
|
"logits/rejected": -2.466034412384033, |
|
"logps/chosen": -0.7143852710723877, |
|
"logps/rejected": -1382.494140625, |
|
"loss": 0.227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2079528272151947, |
|
"rewards/margins": 13.4636812210083, |
|
"rewards/rejected": -13.255727767944336, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.26719940067424147, |
|
"grad_norm": 0.01153564453125, |
|
"learning_rate": 4.5865875187692695e-06, |
|
"logits/chosen": -2.1900734901428223, |
|
"logits/rejected": -2.3761203289031982, |
|
"logps/chosen": -1.549536943435669, |
|
"logps/rejected": -1185.685791015625, |
|
"loss": 0.2282, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20429477095603943, |
|
"rewards/margins": 11.534225463867188, |
|
"rewards/rejected": -11.32992935180664, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2696965913347484, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 4.57450169140327e-06, |
|
"logits/chosen": -2.0554583072662354, |
|
"logits/rejected": -2.273556709289551, |
|
"logps/chosen": -1.3945400714874268, |
|
"logps/rejected": -1522.8463134765625, |
|
"loss": 0.2259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2084466516971588, |
|
"rewards/margins": 14.89411449432373, |
|
"rewards/rejected": -14.685667037963867, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.27219378199525535, |
|
"grad_norm": 0.1416015625, |
|
"learning_rate": 4.562258145673507e-06, |
|
"logits/chosen": -2.20988392829895, |
|
"logits/rejected": -2.4358487129211426, |
|
"logps/chosen": -1.0550658702850342, |
|
"logps/rejected": -1489.2562255859375, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20840421319007874, |
|
"rewards/margins": 14.553556442260742, |
|
"rewards/rejected": -14.34515380859375, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.27469097265576226, |
|
"grad_norm": 0.017578125, |
|
"learning_rate": 4.549857812421353e-06, |
|
"logits/chosen": -2.1285512447357178, |
|
"logits/rejected": -2.318908929824829, |
|
"logps/chosen": -0.753593921661377, |
|
"logps/rejected": -1319.107666015625, |
|
"loss": 0.2258, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20573386549949646, |
|
"rewards/margins": 12.884170532226562, |
|
"rewards/rejected": -12.678436279296875, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2771881633162692, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 4.537301634408281e-06, |
|
"logits/chosen": -2.1442999839782715, |
|
"logits/rejected": -2.34287691116333, |
|
"logps/chosen": -0.9622041583061218, |
|
"logps/rejected": -1223.08837890625, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21334879100322723, |
|
"rewards/margins": 11.921293258666992, |
|
"rewards/rejected": -11.707944869995117, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.27968535397677613, |
|
"grad_norm": 0.027099609375, |
|
"learning_rate": 4.52459056624419e-06, |
|
"logits/chosen": -2.198021173477173, |
|
"logits/rejected": -2.3665783405303955, |
|
"logps/chosen": -1.6707994937896729, |
|
"logps/rejected": -1209.2952880859375, |
|
"loss": 0.2269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20634672045707703, |
|
"rewards/margins": 11.70842170715332, |
|
"rewards/rejected": -11.502074241638184, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.28218254463728304, |
|
"grad_norm": 0.0458984375, |
|
"learning_rate": 4.51172557431483e-06, |
|
"logits/chosen": -2.0804460048675537, |
|
"logits/rejected": -2.27351713180542, |
|
"logps/chosen": -1.3884862661361694, |
|
"logps/rejected": -1267.9599609375, |
|
"loss": 0.227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20677968859672546, |
|
"rewards/margins": 12.219032287597656, |
|
"rewards/rejected": -12.012252807617188, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.28467973529779, |
|
"grad_norm": 0.0751953125, |
|
"learning_rate": 4.49870763670833e-06, |
|
"logits/chosen": -2.1440179347991943, |
|
"logits/rejected": -2.3646531105041504, |
|
"logps/chosen": -0.9940131306648254, |
|
"logps/rejected": -1360.1025390625, |
|
"loss": 0.2245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2132159024477005, |
|
"rewards/margins": 13.244120597839355, |
|
"rewards/rejected": -13.030904769897461, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2871769259582969, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 4.4855377431408335e-06, |
|
"logits/chosen": -2.124523639678955, |
|
"logits/rejected": -2.308046817779541, |
|
"logps/chosen": -1.051758885383606, |
|
"logps/rejected": -1258.587158203125, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21350452303886414, |
|
"rewards/margins": 12.086160659790039, |
|
"rewards/rejected": -11.872655868530273, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2896741166188038, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 4.472216894881261e-06, |
|
"logits/chosen": -2.12388277053833, |
|
"logits/rejected": -2.2992734909057617, |
|
"logps/chosen": -1.0673718452453613, |
|
"logps/rejected": -1227.642822265625, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21056973934173584, |
|
"rewards/margins": 11.97436809539795, |
|
"rewards/rejected": -11.763797760009766, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2921713072793108, |
|
"grad_norm": 0.062255859375, |
|
"learning_rate": 4.4587461046751815e-06, |
|
"logits/chosen": -2.165827512741089, |
|
"logits/rejected": -2.366560697555542, |
|
"logps/chosen": -1.3018419742584229, |
|
"logps/rejected": -1152.0526123046875, |
|
"loss": 0.2272, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2166510820388794, |
|
"rewards/margins": 11.213326454162598, |
|
"rewards/rejected": -10.996675491333008, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.2946684979398177, |
|
"grad_norm": 0.0218505859375, |
|
"learning_rate": 4.44512639666781e-06, |
|
"logits/chosen": -2.153282642364502, |
|
"logits/rejected": -2.3281288146972656, |
|
"logps/chosen": -0.8735140562057495, |
|
"logps/rejected": -1144.37744140625, |
|
"loss": 0.2288, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20929470658302307, |
|
"rewards/margins": 11.165544509887695, |
|
"rewards/rejected": -10.956250190734863, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.29716568860032466, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 4.431358806326158e-06, |
|
"logits/chosen": -2.0921244621276855, |
|
"logits/rejected": -2.2888898849487305, |
|
"logps/chosen": -1.9632396697998047, |
|
"logps/rejected": -1334.217041015625, |
|
"loss": 0.2274, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21045899391174316, |
|
"rewards/margins": 12.872146606445312, |
|
"rewards/rejected": -12.661687850952148, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.29966287926083157, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 4.4174443803603e-06, |
|
"logits/chosen": -2.1807141304016113, |
|
"logits/rejected": -2.35149884223938, |
|
"logps/chosen": -1.1249208450317383, |
|
"logps/rejected": -1231.4007568359375, |
|
"loss": 0.2276, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2088349312543869, |
|
"rewards/margins": 11.981757164001465, |
|
"rewards/rejected": -11.772923469543457, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3021600699213385, |
|
"grad_norm": 0.054931640625, |
|
"learning_rate": 4.4033841766438e-06, |
|
"logits/chosen": -2.153378486633301, |
|
"logits/rejected": -2.333552598953247, |
|
"logps/chosen": -1.4812664985656738, |
|
"logps/rejected": -1186.764404296875, |
|
"loss": 0.2282, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21333375573158264, |
|
"rewards/margins": 11.490147590637207, |
|
"rewards/rejected": -11.276814460754395, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.30465726058184545, |
|
"grad_norm": 0.0262451171875, |
|
"learning_rate": 4.389179264133281e-06, |
|
"logits/chosen": -2.232697010040283, |
|
"logits/rejected": -2.418818235397339, |
|
"logps/chosen": -0.8499106168746948, |
|
"logps/rejected": -1287.507568359375, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20431029796600342, |
|
"rewards/margins": 12.579316139221191, |
|
"rewards/rejected": -12.375005722045898, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.30715445124235236, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 4.374830722787159e-06, |
|
"logits/chosen": -2.2435195446014404, |
|
"logits/rejected": -2.4646503925323486, |
|
"logps/chosen": -0.5742496252059937, |
|
"logps/rejected": -1343.397216796875, |
|
"loss": 0.2276, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2151050567626953, |
|
"rewards/margins": 13.130419731140137, |
|
"rewards/rejected": -12.915315628051758, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.30965164190285926, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 4.360339643483533e-06, |
|
"logits/chosen": -2.2148001194000244, |
|
"logits/rejected": -2.421738862991333, |
|
"logps/chosen": -1.9802653789520264, |
|
"logps/rejected": -1262.169189453125, |
|
"loss": 0.2275, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20881418883800507, |
|
"rewards/margins": 12.177266120910645, |
|
"rewards/rejected": -11.968450546264648, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.31214883256336623, |
|
"grad_norm": 0.01348876953125, |
|
"learning_rate": 4.345707127937253e-06, |
|
"logits/chosen": -2.1191718578338623, |
|
"logits/rejected": -2.344691753387451, |
|
"logps/chosen": -0.9136890172958374, |
|
"logps/rejected": -1512.323974609375, |
|
"loss": 0.2245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21416035294532776, |
|
"rewards/margins": 14.802743911743164, |
|
"rewards/rejected": -14.588582992553711, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.31464602322387314, |
|
"grad_norm": 0.0751953125, |
|
"learning_rate": 4.330934288616154e-06, |
|
"logits/chosen": -2.1469109058380127, |
|
"logits/rejected": -2.3361592292785645, |
|
"logps/chosen": -1.4744806289672852, |
|
"logps/rejected": -1288.8616943359375, |
|
"loss": 0.2258, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2088310271501541, |
|
"rewards/margins": 12.5834379196167, |
|
"rewards/rejected": -12.374608039855957, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.31714321388438005, |
|
"grad_norm": 0.03369140625, |
|
"learning_rate": 4.316022248656485e-06, |
|
"logits/chosen": -2.0783493518829346, |
|
"logits/rejected": -2.3048255443573, |
|
"logps/chosen": -1.100656270980835, |
|
"logps/rejected": -1277.9552001953125, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20990002155303955, |
|
"rewards/margins": 12.193166732788086, |
|
"rewards/rejected": -11.983266830444336, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.319640404544887, |
|
"grad_norm": 0.0400390625, |
|
"learning_rate": 4.3009721417775166e-06, |
|
"logits/chosen": -2.1016387939453125, |
|
"logits/rejected": -2.3064982891082764, |
|
"logps/chosen": -1.263979196548462, |
|
"logps/rejected": -1323.89599609375, |
|
"loss": 0.2257, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21145665645599365, |
|
"rewards/margins": 12.815747261047363, |
|
"rewards/rejected": -12.604291915893555, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3221375952053939, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 4.285785112195346e-06, |
|
"logits/chosen": -2.188570976257324, |
|
"logits/rejected": -2.397493600845337, |
|
"logps/chosen": -2.353158473968506, |
|
"logps/rejected": -1393.356201171875, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20123986899852753, |
|
"rewards/margins": 13.566085815429688, |
|
"rewards/rejected": -13.364847183227539, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.3246347858659009, |
|
"grad_norm": 0.04345703125, |
|
"learning_rate": 4.27046231453591e-06, |
|
"logits/chosen": -2.115800142288208, |
|
"logits/rejected": -2.314438819885254, |
|
"logps/chosen": -1.3714869022369385, |
|
"logps/rejected": -1331.2506103515625, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20796707272529602, |
|
"rewards/margins": 12.886337280273438, |
|
"rewards/rejected": -12.678369522094727, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3271319765264078, |
|
"grad_norm": 0.0322265625, |
|
"learning_rate": 4.255004913747196e-06, |
|
"logits/chosen": -2.1591382026672363, |
|
"logits/rejected": -2.3501150608062744, |
|
"logps/chosen": -0.8996777534484863, |
|
"logps/rejected": -1417.157470703125, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2074543684720993, |
|
"rewards/margins": 13.838354110717773, |
|
"rewards/rejected": -13.630900382995605, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.3296291671869147, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 4.2394140850106825e-06, |
|
"logits/chosen": -2.0840930938720703, |
|
"logits/rejected": -2.285808801651001, |
|
"logps/chosen": -0.9041382670402527, |
|
"logps/rejected": -1322.038818359375, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2139444649219513, |
|
"rewards/margins": 12.818634033203125, |
|
"rewards/rejected": -12.604690551757812, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.33212635784742167, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 4.223691013651986e-06, |
|
"logits/chosen": -2.141530990600586, |
|
"logits/rejected": -2.363454580307007, |
|
"logps/chosen": -2.294220209121704, |
|
"logps/rejected": -1329.7213134765625, |
|
"loss": 0.2239, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2177181988954544, |
|
"rewards/margins": 12.63646411895752, |
|
"rewards/rejected": -12.418745040893555, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.3346235485079286, |
|
"grad_norm": 0.039794921875, |
|
"learning_rate": 4.207836895050748e-06, |
|
"logits/chosen": -2.263815402984619, |
|
"logits/rejected": -2.524907350540161, |
|
"logps/chosen": -0.85591059923172, |
|
"logps/rejected": -1496.051513671875, |
|
"loss": 0.2258, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21331222355365753, |
|
"rewards/margins": 14.541677474975586, |
|
"rewards/rejected": -14.32836627960205, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.3371207391684355, |
|
"grad_norm": 0.056640625, |
|
"learning_rate": 4.1918529345497525e-06, |
|
"logits/chosen": -2.1795644760131836, |
|
"logits/rejected": -2.345736026763916, |
|
"logps/chosen": -1.1188920736312866, |
|
"logps/rejected": -1032.299560546875, |
|
"loss": 0.2274, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21017661690711975, |
|
"rewards/margins": 9.974283218383789, |
|
"rewards/rejected": -9.764104843139648, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.33961792982894246, |
|
"grad_norm": 0.061767578125, |
|
"learning_rate": 4.175740347363289e-06, |
|
"logits/chosen": -2.2571511268615723, |
|
"logits/rejected": -2.450302839279175, |
|
"logps/chosen": -2.4634203910827637, |
|
"logps/rejected": -1143.845703125, |
|
"loss": 0.2276, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20782017707824707, |
|
"rewards/margins": 10.989904403686523, |
|
"rewards/rejected": -10.782083511352539, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.34211512048944936, |
|
"grad_norm": 0.021240234375, |
|
"learning_rate": 4.159500358484759e-06, |
|
"logits/chosen": -2.104897975921631, |
|
"logits/rejected": -2.321760654449463, |
|
"logps/chosen": -1.1564667224884033, |
|
"logps/rejected": -1532.8436279296875, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21173422038555145, |
|
"rewards/margins": 14.948999404907227, |
|
"rewards/rejected": -14.737266540527344, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3446123111499563, |
|
"grad_norm": 0.0306396484375, |
|
"learning_rate": 4.143134202593549e-06, |
|
"logits/chosen": -2.1347815990448, |
|
"logits/rejected": -2.3222789764404297, |
|
"logps/chosen": -2.063771963119507, |
|
"logps/rejected": -1179.3240966796875, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.216390922665596, |
|
"rewards/margins": 11.309762001037598, |
|
"rewards/rejected": -11.093371391296387, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.34710950181046324, |
|
"grad_norm": 0.03955078125, |
|
"learning_rate": 4.126643123961158e-06, |
|
"logits/chosen": -2.216097354888916, |
|
"logits/rejected": -2.431462049484253, |
|
"logps/chosen": -1.3367359638214111, |
|
"logps/rejected": -1441.5928955078125, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2119736224412918, |
|
"rewards/margins": 14.054840087890625, |
|
"rewards/rejected": -13.842867851257324, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.34960669247097015, |
|
"grad_norm": 0.07470703125, |
|
"learning_rate": 4.110028376356599e-06, |
|
"logits/chosen": -2.194693088531494, |
|
"logits/rejected": -2.394153118133545, |
|
"logps/chosen": -2.143383264541626, |
|
"logps/rejected": -1089.128173828125, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2104116678237915, |
|
"rewards/margins": 10.493813514709473, |
|
"rewards/rejected": -10.283400535583496, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3521038831314771, |
|
"grad_norm": 0.03369140625, |
|
"learning_rate": 4.093291222951079e-06, |
|
"logits/chosen": -2.1454501152038574, |
|
"logits/rejected": -2.360769033432007, |
|
"logps/chosen": -1.1339516639709473, |
|
"logps/rejected": -1363.47119140625, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.209524005651474, |
|
"rewards/margins": 13.2172269821167, |
|
"rewards/rejected": -13.007702827453613, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.354601073791984, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 4.076432936221965e-06, |
|
"logits/chosen": -2.135999917984009, |
|
"logits/rejected": -2.3061912059783936, |
|
"logps/chosen": -0.5820466876029968, |
|
"logps/rejected": -1179.7847900390625, |
|
"loss": 0.2283, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2054443657398224, |
|
"rewards/margins": 11.520200729370117, |
|
"rewards/rejected": -11.314754486083984, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.35709826445249093, |
|
"grad_norm": 0.019775390625, |
|
"learning_rate": 4.059454797856039e-06, |
|
"logits/chosen": -2.172046184539795, |
|
"logits/rejected": -2.342928171157837, |
|
"logps/chosen": -0.7546096444129944, |
|
"logps/rejected": -1167.744873046875, |
|
"loss": 0.2286, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20582588016986847, |
|
"rewards/margins": 11.390329360961914, |
|
"rewards/rejected": -11.184503555297852, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3595954551129979, |
|
"grad_norm": 0.043212890625, |
|
"learning_rate": 4.042358098652057e-06, |
|
"logits/chosen": -2.244403123855591, |
|
"logits/rejected": -2.4426932334899902, |
|
"logps/chosen": -1.5733036994934082, |
|
"logps/rejected": -1163.822998046875, |
|
"loss": 0.2252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21082696318626404, |
|
"rewards/margins": 11.297124862670898, |
|
"rewards/rejected": -11.086297988891602, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3620926457735048, |
|
"grad_norm": 0.046142578125, |
|
"learning_rate": 4.025144138422615e-06, |
|
"logits/chosen": -2.189898729324341, |
|
"logits/rejected": -2.393465757369995, |
|
"logps/chosen": -1.2910453081130981, |
|
"logps/rejected": -1412.8597412109375, |
|
"loss": 0.227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21461701393127441, |
|
"rewards/margins": 13.799296379089355, |
|
"rewards/rejected": -13.584680557250977, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3645898364340117, |
|
"grad_norm": 0.0169677734375, |
|
"learning_rate": 4.007814225895321e-06, |
|
"logits/chosen": -2.170092821121216, |
|
"logits/rejected": -2.3824923038482666, |
|
"logps/chosen": -0.8392337560653687, |
|
"logps/rejected": -1365.531005859375, |
|
"loss": 0.2266, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20813941955566406, |
|
"rewards/margins": 13.32819652557373, |
|
"rewards/rejected": -13.120054244995117, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.3670870270945187, |
|
"grad_norm": 0.017333984375, |
|
"learning_rate": 3.990369678613303e-06, |
|
"logits/chosen": -2.0936970710754395, |
|
"logits/rejected": -2.3042235374450684, |
|
"logps/chosen": -1.4599825143814087, |
|
"logps/rejected": -1356.390869140625, |
|
"loss": 0.225, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21089884638786316, |
|
"rewards/margins": 12.903648376464844, |
|
"rewards/rejected": -12.6927490234375, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3695842177550256, |
|
"grad_norm": 0.0240478515625, |
|
"learning_rate": 3.97281182283504e-06, |
|
"logits/chosen": -2.157559871673584, |
|
"logits/rejected": -2.371856927871704, |
|
"logps/chosen": -1.3865526914596558, |
|
"logps/rejected": -1416.440185546875, |
|
"loss": 0.227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20923642814159393, |
|
"rewards/margins": 13.767707824707031, |
|
"rewards/rejected": -13.558469772338867, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3720814084155325, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 3.955141993433526e-06, |
|
"logits/chosen": -2.2016472816467285, |
|
"logits/rejected": -2.3889071941375732, |
|
"logps/chosen": -1.0489656925201416, |
|
"logps/rejected": -1286.4302978515625, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21162299811840057, |
|
"rewards/margins": 12.558609962463379, |
|
"rewards/rejected": -12.3469877243042, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.37457859907603946, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 3.937361533794784e-06, |
|
"logits/chosen": -2.1290640830993652, |
|
"logits/rejected": -2.337486505508423, |
|
"logps/chosen": -1.496525526046753, |
|
"logps/rejected": -1124.3212890625, |
|
"loss": 0.2277, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21092331409454346, |
|
"rewards/margins": 10.851540565490723, |
|
"rewards/rejected": -10.640616416931152, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3770757897365464, |
|
"grad_norm": 0.0233154296875, |
|
"learning_rate": 3.919471795715738e-06, |
|
"logits/chosen": -2.18410587310791, |
|
"logits/rejected": -2.3675644397735596, |
|
"logps/chosen": -0.84355628490448, |
|
"logps/rejected": -1166.61279296875, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2062278687953949, |
|
"rewards/margins": 11.381316184997559, |
|
"rewards/rejected": -11.175088882446289, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.37957298039705334, |
|
"grad_norm": 0.0390625, |
|
"learning_rate": 3.901474139301433e-06, |
|
"logits/chosen": -2.0796847343444824, |
|
"logits/rejected": -2.264577627182007, |
|
"logps/chosen": -0.6843720078468323, |
|
"logps/rejected": -1241.1590576171875, |
|
"loss": 0.2259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21053218841552734, |
|
"rewards/margins": 12.031414031982422, |
|
"rewards/rejected": -11.820880889892578, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.38207017105756025, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 3.883369932861634e-06, |
|
"logits/chosen": -2.2165303230285645, |
|
"logits/rejected": -2.3859565258026123, |
|
"logps/chosen": -1.1263262033462524, |
|
"logps/rejected": -1200.8397216796875, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20847392082214355, |
|
"rewards/margins": 11.745917320251465, |
|
"rewards/rejected": -11.537444114685059, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.38456736171806716, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 3.865160552806796e-06, |
|
"logits/chosen": -2.262539863586426, |
|
"logits/rejected": -2.4538345336914062, |
|
"logps/chosen": -1.3924305438995361, |
|
"logps/rejected": -1240.5035400390625, |
|
"loss": 0.2259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20712998509407043, |
|
"rewards/margins": 12.123323440551758, |
|
"rewards/rejected": -11.916193008422852, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3870645523785741, |
|
"grad_norm": 0.041015625, |
|
"learning_rate": 3.84684738354342e-06, |
|
"logits/chosen": -2.267106771469116, |
|
"logits/rejected": -2.4566650390625, |
|
"logps/chosen": -2.0142922401428223, |
|
"logps/rejected": -1211.2545166015625, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2093096524477005, |
|
"rewards/margins": 11.762309074401855, |
|
"rewards/rejected": -11.552999496459961, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.38956174303908103, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 3.828431817368798e-06, |
|
"logits/chosen": -2.141620397567749, |
|
"logits/rejected": -2.33925199508667, |
|
"logps/chosen": -1.531597375869751, |
|
"logps/rejected": -1257.968994140625, |
|
"loss": 0.228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2043263465166092, |
|
"rewards/margins": 12.106410026550293, |
|
"rewards/rejected": -11.902084350585938, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.39205893369958794, |
|
"grad_norm": 0.07763671875, |
|
"learning_rate": 3.8099152543651684e-06, |
|
"logits/chosen": -2.3559296131134033, |
|
"logits/rejected": -2.583070993423462, |
|
"logps/chosen": -0.7891671061515808, |
|
"logps/rejected": -1441.2958984375, |
|
"loss": 0.2257, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20671992003917694, |
|
"rewards/margins": 14.086430549621582, |
|
"rewards/rejected": -13.87971019744873, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3945561243600949, |
|
"grad_norm": 0.03857421875, |
|
"learning_rate": 3.791299102293261e-06, |
|
"logits/chosen": -2.1035549640655518, |
|
"logits/rejected": -2.3072731494903564, |
|
"logps/chosen": -1.0839884281158447, |
|
"logps/rejected": -1459.4197998046875, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21341785788536072, |
|
"rewards/margins": 14.197916984558105, |
|
"rewards/rejected": -13.98449993133545, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3970533150206018, |
|
"grad_norm": 0.03369140625, |
|
"learning_rate": 3.7725847764852774e-06, |
|
"logits/chosen": -2.10914945602417, |
|
"logits/rejected": -2.3385162353515625, |
|
"logps/chosen": -1.6078799962997437, |
|
"logps/rejected": -1307.208740234375, |
|
"loss": 0.2257, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2162178009748459, |
|
"rewards/margins": 12.54298210144043, |
|
"rewards/rejected": -12.326765060424805, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3995505056811087, |
|
"grad_norm": 0.0267333984375, |
|
"learning_rate": 3.7537736997372833e-06, |
|
"logits/chosen": -2.1722114086151123, |
|
"logits/rejected": -2.3555681705474854, |
|
"logps/chosen": -1.133063793182373, |
|
"logps/rejected": -1113.764404296875, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21135945618152618, |
|
"rewards/margins": 10.682828903198242, |
|
"rewards/rejected": -10.471468925476074, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4020476963416157, |
|
"grad_norm": 0.020751953125, |
|
"learning_rate": 3.734867302201038e-06, |
|
"logits/chosen": -2.2481324672698975, |
|
"logits/rejected": -2.4178614616394043, |
|
"logps/chosen": -0.7748688459396362, |
|
"logps/rejected": -1153.1929931640625, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2061961144208908, |
|
"rewards/margins": 11.231634140014648, |
|
"rewards/rejected": -11.02543830871582, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4045448870021226, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 3.7158670212752666e-06, |
|
"logits/chosen": -2.158440113067627, |
|
"logits/rejected": -2.3695878982543945, |
|
"logps/chosen": -0.685897946357727, |
|
"logps/rejected": -1294.4326171875, |
|
"loss": 0.227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2151576578617096, |
|
"rewards/margins": 12.627668380737305, |
|
"rewards/rejected": -12.412511825561523, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.40704207766262956, |
|
"grad_norm": 0.015869140625, |
|
"learning_rate": 3.696774301496376e-06, |
|
"logits/chosen": -2.2252297401428223, |
|
"logits/rejected": -2.4217424392700195, |
|
"logps/chosen": -0.6748331785202026, |
|
"logps/rejected": -1261.10009765625, |
|
"loss": 0.2248, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21283042430877686, |
|
"rewards/margins": 12.33554458618164, |
|
"rewards/rejected": -12.122715950012207, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.4095392683231365, |
|
"grad_norm": 0.0283203125, |
|
"learning_rate": 3.677590594428629e-06, |
|
"logits/chosen": -2.159726619720459, |
|
"logits/rejected": -2.3402228355407715, |
|
"logps/chosen": -0.9869475364685059, |
|
"logps/rejected": -1201.0703125, |
|
"loss": 0.2279, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20935773849487305, |
|
"rewards/margins": 11.699995994567871, |
|
"rewards/rejected": -11.490636825561523, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.4120364589836434, |
|
"grad_norm": 0.07470703125, |
|
"learning_rate": 3.658317358553794e-06, |
|
"logits/chosen": -2.1311771869659424, |
|
"logits/rejected": -2.3283205032348633, |
|
"logps/chosen": -0.7873401045799255, |
|
"logps/rejected": -1318.947265625, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20913653075695038, |
|
"rewards/margins": 12.813528060913086, |
|
"rewards/rejected": -12.604392051696777, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.41453364964415035, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 3.638956059160252e-06, |
|
"logits/chosen": -2.180502414703369, |
|
"logits/rejected": -2.3862075805664062, |
|
"logps/chosen": -1.0054365396499634, |
|
"logps/rejected": -1342.7799072265625, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21474532783031464, |
|
"rewards/margins": 13.147130966186523, |
|
"rewards/rejected": -12.932388305664062, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.41703084030465726, |
|
"grad_norm": 0.03369140625, |
|
"learning_rate": 3.6195081682315972e-06, |
|
"logits/chosen": -2.2029502391815186, |
|
"logits/rejected": -2.3754451274871826, |
|
"logps/chosen": -1.1696422100067139, |
|
"logps/rejected": -1324.997802734375, |
|
"loss": 0.2252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20706875622272491, |
|
"rewards/margins": 12.972146987915039, |
|
"rewards/rejected": -12.765077590942383, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.41952803096516417, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 3.5999751643347342e-06, |
|
"logits/chosen": -2.126647472381592, |
|
"logits/rejected": -2.32842755317688, |
|
"logps/chosen": -1.3129024505615234, |
|
"logps/rejected": -1431.062255859375, |
|
"loss": 0.2246, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2164861261844635, |
|
"rewards/margins": 13.894182205200195, |
|
"rewards/rejected": -13.677694320678711, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.42202522162567113, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 3.5803585325074536e-06, |
|
"logits/chosen": -2.1573426723480225, |
|
"logits/rejected": -2.3461415767669678, |
|
"logps/chosen": -0.5849089622497559, |
|
"logps/rejected": -1369.0498046875, |
|
"loss": 0.2265, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20605847239494324, |
|
"rewards/margins": 13.385258674621582, |
|
"rewards/rejected": -13.179201126098633, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.42452241228617804, |
|
"grad_norm": 0.041015625, |
|
"learning_rate": 3.5606597641455387e-06, |
|
"logits/chosen": -2.201714515686035, |
|
"logits/rejected": -2.3846235275268555, |
|
"logps/chosen": -1.2365072965621948, |
|
"logps/rejected": -1268.500732421875, |
|
"loss": 0.2273, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20618323981761932, |
|
"rewards/margins": 12.352148056030273, |
|
"rewards/rejected": -12.145965576171875, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.427019602946685, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 3.540880356889376e-06, |
|
"logits/chosen": -2.204244375228882, |
|
"logits/rejected": -2.37742280960083, |
|
"logps/chosen": -1.9021589756011963, |
|
"logps/rejected": -1228.02685546875, |
|
"loss": 0.228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1988053023815155, |
|
"rewards/margins": 11.881242752075195, |
|
"rewards/rejected": -11.682435035705566, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4295167936071919, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 3.5210218145100934e-06, |
|
"logits/chosen": -2.1249117851257324, |
|
"logits/rejected": -2.343653917312622, |
|
"logps/chosen": -0.9779669642448425, |
|
"logps/rejected": -1107.069580078125, |
|
"loss": 0.2291, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20971660315990448, |
|
"rewards/margins": 10.748934745788574, |
|
"rewards/rejected": -10.53921890258789, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.4320139842676988, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 3.5010856467952335e-06, |
|
"logits/chosen": -2.135411262512207, |
|
"logits/rejected": -2.3283915519714355, |
|
"logps/chosen": -1.680784821510315, |
|
"logps/rejected": -1203.44873046875, |
|
"loss": 0.2277, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21119749546051025, |
|
"rewards/margins": 11.60279655456543, |
|
"rewards/rejected": -11.391599655151367, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.4345111749282058, |
|
"grad_norm": 0.061279296875, |
|
"learning_rate": 3.4810733694339687e-06, |
|
"logits/chosen": -2.227553367614746, |
|
"logits/rejected": -2.4453303813934326, |
|
"logps/chosen": -1.1945085525512695, |
|
"logps/rejected": -1365.62158203125, |
|
"loss": 0.2255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21306195855140686, |
|
"rewards/margins": 13.283732414245605, |
|
"rewards/rejected": -13.070669174194336, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.4370083655887127, |
|
"grad_norm": 0.026123046875, |
|
"learning_rate": 3.4609865039018676e-06, |
|
"logits/chosen": -2.2143800258636475, |
|
"logits/rejected": -2.38647198677063, |
|
"logps/chosen": -0.3982168138027191, |
|
"logps/rejected": -1256.0924072265625, |
|
"loss": 0.2282, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20522812008857727, |
|
"rewards/margins": 12.254903793334961, |
|
"rewards/rejected": -12.049676895141602, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.4395055562492196, |
|
"grad_norm": 0.017578125, |
|
"learning_rate": 3.4408265773452226e-06, |
|
"logits/chosen": -2.132845401763916, |
|
"logits/rejected": -2.32383394241333, |
|
"logps/chosen": -0.7928985953330994, |
|
"logps/rejected": -1260.4219970703125, |
|
"loss": 0.2281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21432673931121826, |
|
"rewards/margins": 12.288119316101074, |
|
"rewards/rejected": -12.07379150390625, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4420027469097266, |
|
"grad_norm": 0.027587890625, |
|
"learning_rate": 3.420595122464942e-06, |
|
"logits/chosen": -2.2310843467712402, |
|
"logits/rejected": -2.43049693107605, |
|
"logps/chosen": -1.0165212154388428, |
|
"logps/rejected": -1248.940673828125, |
|
"loss": 0.2265, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20808283984661102, |
|
"rewards/margins": 12.158212661743164, |
|
"rewards/rejected": -11.950130462646484, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.4444999375702335, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 3.4002936774000284e-06, |
|
"logits/chosen": -2.129657030105591, |
|
"logits/rejected": -2.3626723289489746, |
|
"logps/chosen": -0.534063994884491, |
|
"logps/rejected": -1597.343017578125, |
|
"loss": 0.2245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21577997505664825, |
|
"rewards/margins": 15.636571884155273, |
|
"rewards/rejected": -15.420791625976562, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.4469971282307404, |
|
"grad_norm": 0.02587890625, |
|
"learning_rate": 3.3799237856106348e-06, |
|
"logits/chosen": -2.1293628215789795, |
|
"logits/rejected": -2.3366832733154297, |
|
"logps/chosen": -0.6109465956687927, |
|
"logps/rejected": -1318.239990234375, |
|
"loss": 0.2255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2080315351486206, |
|
"rewards/margins": 12.848733901977539, |
|
"rewards/rejected": -12.640703201293945, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.44949431889124736, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 3.35948699576072e-06, |
|
"logits/chosen": -2.0792922973632812, |
|
"logits/rejected": -2.285391330718994, |
|
"logps/chosen": -0.9549906849861145, |
|
"logps/rejected": -1534.51953125, |
|
"loss": 0.2244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21579799056053162, |
|
"rewards/margins": 14.997169494628906, |
|
"rewards/rejected": -14.7813720703125, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.45199150955175427, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 3.3389848616003085e-06, |
|
"logits/chosen": -2.169448137283325, |
|
"logits/rejected": -2.34112286567688, |
|
"logps/chosen": -1.1561418771743774, |
|
"logps/rejected": -1331.464111328125, |
|
"loss": 0.2289, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20999138057231903, |
|
"rewards/margins": 12.991872787475586, |
|
"rewards/rejected": -12.781880378723145, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.45448870021226123, |
|
"grad_norm": 0.024169921875, |
|
"learning_rate": 3.3184189418473674e-06, |
|
"logits/chosen": -2.0690829753875732, |
|
"logits/rejected": -2.2553791999816895, |
|
"logps/chosen": -0.737138032913208, |
|
"logps/rejected": -1278.2681884765625, |
|
"loss": 0.2266, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20780067145824432, |
|
"rewards/margins": 12.481771469116211, |
|
"rewards/rejected": -12.273969650268555, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.45698589087276814, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 3.2977908000692925e-06, |
|
"logits/chosen": -2.1408801078796387, |
|
"logits/rejected": -2.3243911266326904, |
|
"logps/chosen": -1.5268166065216064, |
|
"logps/rejected": -1405.22412109375, |
|
"loss": 0.2274, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20781424641609192, |
|
"rewards/margins": 13.740381240844727, |
|
"rewards/rejected": -13.532565116882324, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.45948308153327505, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 3.2771020045640435e-06, |
|
"logits/chosen": -2.286168336868286, |
|
"logits/rejected": -2.4684412479400635, |
|
"logps/chosen": -0.6708983182907104, |
|
"logps/rejected": -1134.7979736328125, |
|
"loss": 0.2259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21890632808208466, |
|
"rewards/margins": 11.039240837097168, |
|
"rewards/rejected": -10.820335388183594, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.461980272193782, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 3.256354128240907e-06, |
|
"logits/chosen": -2.06745982170105, |
|
"logits/rejected": -2.248892307281494, |
|
"logps/chosen": -1.6344282627105713, |
|
"logps/rejected": -1263.974853515625, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21318969130516052, |
|
"rewards/margins": 12.202125549316406, |
|
"rewards/rejected": -11.988935470581055, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.4644774628542889, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 3.235548748500914e-06, |
|
"logits/chosen": -2.3071300983428955, |
|
"logits/rejected": -2.500091314315796, |
|
"logps/chosen": -1.0427045822143555, |
|
"logps/rejected": -1357.378662109375, |
|
"loss": 0.2275, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20457443594932556, |
|
"rewards/margins": 13.286227226257324, |
|
"rewards/rejected": -13.081652641296387, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.46697465351479583, |
|
"grad_norm": 0.04248046875, |
|
"learning_rate": 3.214687447116913e-06, |
|
"logits/chosen": -2.10600209236145, |
|
"logits/rejected": -2.302873373031616, |
|
"logps/chosen": -0.6546305418014526, |
|
"logps/rejected": -1224.43359375, |
|
"loss": 0.2272, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20871946215629578, |
|
"rewards/margins": 11.745490074157715, |
|
"rewards/rejected": -11.536770820617676, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4694718441753028, |
|
"grad_norm": 0.01104736328125, |
|
"learning_rate": 3.193771810113313e-06, |
|
"logits/chosen": -2.1570992469787598, |
|
"logits/rejected": -2.384364604949951, |
|
"logps/chosen": -1.154052495956421, |
|
"logps/rejected": -1359.59619140625, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21566633880138397, |
|
"rewards/margins": 13.259126663208008, |
|
"rewards/rejected": -13.043458938598633, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4719690348358097, |
|
"grad_norm": 0.0174560546875, |
|
"learning_rate": 3.1728034276455032e-06, |
|
"logits/chosen": -2.138918399810791, |
|
"logits/rejected": -2.335463047027588, |
|
"logps/chosen": -0.595456600189209, |
|
"logps/rejected": -1286.499267578125, |
|
"loss": 0.2265, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21340537071228027, |
|
"rewards/margins": 12.50808048248291, |
|
"rewards/rejected": -12.294673919677734, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.4744662254963166, |
|
"grad_norm": 0.0191650390625, |
|
"learning_rate": 3.1517838938789597e-06, |
|
"logits/chosen": -2.1312789916992188, |
|
"logits/rejected": -2.3574845790863037, |
|
"logps/chosen": -1.0333608388900757, |
|
"logps/rejected": -1402.2928466796875, |
|
"loss": 0.2258, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21823573112487793, |
|
"rewards/margins": 13.426950454711914, |
|
"rewards/rejected": -13.208715438842773, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4769634161568236, |
|
"grad_norm": 0.021240234375, |
|
"learning_rate": 3.130714806868041e-06, |
|
"logits/chosen": -2.1018803119659424, |
|
"logits/rejected": -2.2899601459503174, |
|
"logps/chosen": -1.5672905445098877, |
|
"logps/rejected": -1282.0211181640625, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2161625623703003, |
|
"rewards/margins": 12.432838439941406, |
|
"rewards/rejected": -12.216676712036133, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.4794606068173305, |
|
"grad_norm": 0.03369140625, |
|
"learning_rate": 3.1095977684344976e-06, |
|
"logits/chosen": -2.1870434284210205, |
|
"logits/rejected": -2.3968632221221924, |
|
"logps/chosen": -0.9621860384941101, |
|
"logps/rejected": -1362.8802490234375, |
|
"loss": 0.2252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21506325900554657, |
|
"rewards/margins": 13.313095092773438, |
|
"rewards/rejected": -13.098034858703613, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.48195779747783746, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 3.0884343840456874e-06, |
|
"logits/chosen": -2.2485427856445312, |
|
"logits/rejected": -2.4523234367370605, |
|
"logps/chosen": -0.8971269726753235, |
|
"logps/rejected": -1507.810791015625, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20877547562122345, |
|
"rewards/margins": 14.738133430480957, |
|
"rewards/rejected": -14.529356002807617, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.48445498813834437, |
|
"grad_norm": 0.07568359375, |
|
"learning_rate": 3.0672262626925174e-06, |
|
"logits/chosen": -2.148587942123413, |
|
"logits/rejected": -2.359325408935547, |
|
"logps/chosen": -2.250260353088379, |
|
"logps/rejected": -1421.3468017578125, |
|
"loss": 0.2252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22058424353599548, |
|
"rewards/margins": 13.836527824401855, |
|
"rewards/rejected": -13.615945816040039, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4869521787988513, |
|
"grad_norm": 0.06640625, |
|
"learning_rate": 3.0459750167671147e-06, |
|
"logits/chosen": -2.1717689037323, |
|
"logits/rejected": -2.403097629547119, |
|
"logps/chosen": -1.1346304416656494, |
|
"logps/rejected": -1519.8033447265625, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21193809807300568, |
|
"rewards/margins": 14.729626655578613, |
|
"rewards/rejected": -14.517687797546387, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.48944936945935824, |
|
"grad_norm": 0.12890625, |
|
"learning_rate": 3.024682261940247e-06, |
|
"logits/chosen": -2.1400859355926514, |
|
"logits/rejected": -2.3196842670440674, |
|
"logps/chosen": -1.9256393909454346, |
|
"logps/rejected": -1212.4700927734375, |
|
"loss": 0.2259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21558912098407745, |
|
"rewards/margins": 11.704329490661621, |
|
"rewards/rejected": -11.488740921020508, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.49194656011986515, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 3.0033496170384803e-06, |
|
"logits/chosen": -2.2003872394561768, |
|
"logits/rejected": -2.384770154953003, |
|
"logps/chosen": -0.6797516345977783, |
|
"logps/rejected": -1223.4056396484375, |
|
"loss": 0.2273, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20655830204486847, |
|
"rewards/margins": 11.95020866394043, |
|
"rewards/rejected": -11.743650436401367, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.49444375078037206, |
|
"grad_norm": 0.05126953125, |
|
"learning_rate": 2.9819787039211068e-06, |
|
"logits/chosen": -2.1409530639648438, |
|
"logits/rejected": -2.3441128730773926, |
|
"logps/chosen": -1.6590759754180908, |
|
"logps/rejected": -1320.5748291015625, |
|
"loss": 0.225, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21551513671875, |
|
"rewards/margins": 12.755599021911621, |
|
"rewards/rejected": -12.540084838867188, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.496940941440879, |
|
"grad_norm": 0.015869140625, |
|
"learning_rate": 2.960571147356845e-06, |
|
"logits/chosen": -2.2252392768859863, |
|
"logits/rejected": -2.4482040405273438, |
|
"logps/chosen": -0.6751580238342285, |
|
"logps/rejected": -1514.2879638671875, |
|
"loss": 0.2233, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22100117802619934, |
|
"rewards/margins": 14.839349746704102, |
|
"rewards/rejected": -14.618349075317383, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.49943813210138593, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 2.9391285749003046e-06, |
|
"logits/chosen": -2.1313652992248535, |
|
"logits/rejected": -2.3276991844177246, |
|
"logps/chosen": -1.28163743019104, |
|
"logps/rejected": -1614.152099609375, |
|
"loss": 0.2252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21389129757881165, |
|
"rewards/margins": 15.772817611694336, |
|
"rewards/rejected": -15.558927536010742, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.49943813210138593, |
|
"eval_logits/chosen": -2.568960428237915, |
|
"eval_logits/rejected": -2.656001329421997, |
|
"eval_logps/chosen": -0.1526380479335785, |
|
"eval_logps/rejected": -643.470458984375, |
|
"eval_loss": 0.2215292751789093, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.25760865211486816, |
|
"eval_rewards/margins": 6.243593215942383, |
|
"eval_rewards/rejected": -5.985984802246094, |
|
"eval_runtime": 0.6593, |
|
"eval_samples_per_second": 7.584, |
|
"eval_steps_per_second": 4.551, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5019353227618929, |
|
"grad_norm": 0.072265625, |
|
"learning_rate": 2.9176526167682543e-06, |
|
"logits/chosen": -2.0913753509521484, |
|
"logits/rejected": -2.273857593536377, |
|
"logps/chosen": -0.7355623841285706, |
|
"logps/rejected": -1363.037841796875, |
|
"loss": 0.228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2061166763305664, |
|
"rewards/margins": 13.315282821655273, |
|
"rewards/rejected": -13.109164237976074, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.5044325134223998, |
|
"grad_norm": 0.0242919921875, |
|
"learning_rate": 2.8961449057156775e-06, |
|
"logits/chosen": -2.1776702404022217, |
|
"logits/rejected": -2.3788368701934814, |
|
"logps/chosen": -1.159735918045044, |
|
"logps/rejected": -1370.439697265625, |
|
"loss": 0.2245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21482279896736145, |
|
"rewards/margins": 13.327527046203613, |
|
"rewards/rejected": -13.112703323364258, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5069297040829067, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 2.874607076911642e-06, |
|
"logits/chosen": -2.1823270320892334, |
|
"logits/rejected": -2.400944471359253, |
|
"logps/chosen": -1.355530023574829, |
|
"logps/rejected": -1275.2886962890625, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2216695249080658, |
|
"rewards/margins": 12.436738967895508, |
|
"rewards/rejected": -12.215067863464355, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.5094268947434136, |
|
"grad_norm": 0.06689453125, |
|
"learning_rate": 2.8530407678149806e-06, |
|
"logits/chosen": -2.1733579635620117, |
|
"logits/rejected": -2.3787028789520264, |
|
"logps/chosen": -2.122178554534912, |
|
"logps/rejected": -1217.6248779296875, |
|
"loss": 0.2275, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21247439086437225, |
|
"rewards/margins": 11.738574028015137, |
|
"rewards/rejected": -11.526100158691406, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5119240854039205, |
|
"grad_norm": 0.0164794921875, |
|
"learning_rate": 2.8314476180498003e-06, |
|
"logits/chosen": -2.010568618774414, |
|
"logits/rejected": -2.1947145462036133, |
|
"logps/chosen": -0.8790448904037476, |
|
"logps/rejected": -1320.770263671875, |
|
"loss": 0.2275, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20843760669231415, |
|
"rewards/margins": 12.884744644165039, |
|
"rewards/rejected": -12.67630672454834, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5144212760644276, |
|
"grad_norm": 0.037109375, |
|
"learning_rate": 2.8098292692808253e-06, |
|
"logits/chosen": -2.1951942443847656, |
|
"logits/rejected": -2.3474528789520264, |
|
"logps/chosen": -0.8600829839706421, |
|
"logps/rejected": -1061.1048583984375, |
|
"loss": 0.2279, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20944657921791077, |
|
"rewards/margins": 10.36804485321045, |
|
"rewards/rejected": -10.158597946166992, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5169184667249345, |
|
"grad_norm": 0.0224609375, |
|
"learning_rate": 2.7881873650885904e-06, |
|
"logits/chosen": -2.1963181495666504, |
|
"logits/rejected": -2.3679440021514893, |
|
"logps/chosen": -0.8357653617858887, |
|
"logps/rejected": -1268.226318359375, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21145395934581757, |
|
"rewards/margins": 12.395639419555664, |
|
"rewards/rejected": -12.184186935424805, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.5194156573854414, |
|
"grad_norm": 0.03955078125, |
|
"learning_rate": 2.7665235508444772e-06, |
|
"logits/chosen": -2.131880044937134, |
|
"logits/rejected": -2.329930067062378, |
|
"logps/chosen": -0.8339768648147583, |
|
"logps/rejected": -1511.36962890625, |
|
"loss": 0.2278, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2037159651517868, |
|
"rewards/margins": 14.78296184539795, |
|
"rewards/rejected": -14.579244613647461, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5219128480459483, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 2.7448394735856275e-06, |
|
"logits/chosen": -2.0990092754364014, |
|
"logits/rejected": -2.317046642303467, |
|
"logps/chosen": -0.900246798992157, |
|
"logps/rejected": -1560.1123046875, |
|
"loss": 0.2273, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20990662276744843, |
|
"rewards/margins": 15.233263969421387, |
|
"rewards/rejected": -15.023355484008789, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5244100387064552, |
|
"grad_norm": 0.05810546875, |
|
"learning_rate": 2.723136781889722e-06, |
|
"logits/chosen": -2.221381664276123, |
|
"logits/rejected": -2.4073383808135986, |
|
"logps/chosen": -1.555213451385498, |
|
"logps/rejected": -1313.25439453125, |
|
"loss": 0.2281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21150963008403778, |
|
"rewards/margins": 12.850160598754883, |
|
"rewards/rejected": -12.638651847839355, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5269072293669622, |
|
"grad_norm": 0.031494140625, |
|
"learning_rate": 2.7014171257496414e-06, |
|
"logits/chosen": -2.224299669265747, |
|
"logits/rejected": -2.4082083702087402, |
|
"logps/chosen": -1.5661276578903198, |
|
"logps/rejected": -1288.989013671875, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2089545726776123, |
|
"rewards/margins": 12.412330627441406, |
|
"rewards/rejected": -12.203374862670898, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5294044200274691, |
|
"grad_norm": 0.044189453125, |
|
"learning_rate": 2.6796821564480237e-06, |
|
"logits/chosen": -2.143993854522705, |
|
"logits/rejected": -2.3330225944519043, |
|
"logps/chosen": -1.3014509677886963, |
|
"logps/rejected": -1159.53271484375, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21552510559558868, |
|
"rewards/margins": 11.207192420959473, |
|
"rewards/rejected": -10.991667747497559, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.531901610687976, |
|
"grad_norm": 0.032958984375, |
|
"learning_rate": 2.6579335264317253e-06, |
|
"logits/chosen": -2.2805047035217285, |
|
"logits/rejected": -2.4840075969696045, |
|
"logps/chosen": -0.6564453840255737, |
|
"logps/rejected": -1376.549560546875, |
|
"loss": 0.2269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20875303447246552, |
|
"rewards/margins": 13.3289794921875, |
|
"rewards/rejected": -13.120226860046387, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5343988013484829, |
|
"grad_norm": 0.02587890625, |
|
"learning_rate": 2.6361728891861843e-06, |
|
"logits/chosen": -2.044534206390381, |
|
"logits/rejected": -2.263455629348755, |
|
"logps/chosen": -2.359926462173462, |
|
"logps/rejected": -1182.7542724609375, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21868690848350525, |
|
"rewards/margins": 11.231551170349121, |
|
"rewards/rejected": -11.01286506652832, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5368959920089899, |
|
"grad_norm": 0.0235595703125, |
|
"learning_rate": 2.614401899109716e-06, |
|
"logits/chosen": -2.2184996604919434, |
|
"logits/rejected": -2.4115943908691406, |
|
"logps/chosen": -0.7188009023666382, |
|
"logps/rejected": -1362.302490234375, |
|
"loss": 0.2252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20849958062171936, |
|
"rewards/margins": 13.323092460632324, |
|
"rewards/rejected": -13.114593505859375, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5393931826694968, |
|
"grad_norm": 0.023681640625, |
|
"learning_rate": 2.5926222113877282e-06, |
|
"logits/chosen": -2.2279531955718994, |
|
"logits/rejected": -2.4470245838165283, |
|
"logps/chosen": -0.8932285308837891, |
|
"logps/rejected": -1380.791748046875, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20549210906028748, |
|
"rewards/margins": 13.206730842590332, |
|
"rewards/rejected": -13.001237869262695, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5418903733300038, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 2.570835481866889e-06, |
|
"logits/chosen": -2.122584819793701, |
|
"logits/rejected": -2.3029303550720215, |
|
"logps/chosen": -0.6316767334938049, |
|
"logps/rejected": -1331.388916015625, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20623505115509033, |
|
"rewards/margins": 13.001462936401367, |
|
"rewards/rejected": -12.795228958129883, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5443875639905107, |
|
"grad_norm": 0.04248046875, |
|
"learning_rate": 2.5490433669292337e-06, |
|
"logits/chosen": -2.044675350189209, |
|
"logits/rejected": -2.251300811767578, |
|
"logps/chosen": -0.7981548309326172, |
|
"logps/rejected": -1485.2850341796875, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2164611518383026, |
|
"rewards/margins": 14.53178596496582, |
|
"rewards/rejected": -14.315322875976562, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5468847546510176, |
|
"grad_norm": 0.038330078125, |
|
"learning_rate": 2.527247523366232e-06, |
|
"logits/chosen": -2.2029881477355957, |
|
"logits/rejected": -2.4012579917907715, |
|
"logps/chosen": -1.3100454807281494, |
|
"logps/rejected": -1426.16357421875, |
|
"loss": 0.2252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2108200490474701, |
|
"rewards/margins": 13.932962417602539, |
|
"rewards/rejected": -13.722142219543457, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.5493819453115245, |
|
"grad_norm": 0.02978515625, |
|
"learning_rate": 2.5054496082528336e-06, |
|
"logits/chosen": -2.263662576675415, |
|
"logits/rejected": -2.4767444133758545, |
|
"logps/chosen": -0.6738319993019104, |
|
"logps/rejected": -1380.506103515625, |
|
"loss": 0.2246, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21471872925758362, |
|
"rewards/margins": 13.528160095214844, |
|
"rewards/rejected": -13.313441276550293, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5518791359720314, |
|
"grad_norm": 0.03955078125, |
|
"learning_rate": 2.483651278821481e-06, |
|
"logits/chosen": -2.2110023498535156, |
|
"logits/rejected": -2.4015591144561768, |
|
"logps/chosen": -1.228434443473816, |
|
"logps/rejected": -1266.2230224609375, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20852570235729218, |
|
"rewards/margins": 12.340888977050781, |
|
"rewards/rejected": -12.13236141204834, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5543763266325384, |
|
"grad_norm": 0.0238037109375, |
|
"learning_rate": 2.4618541923361166e-06, |
|
"logits/chosen": -2.3842873573303223, |
|
"logits/rejected": -2.558562994003296, |
|
"logps/chosen": -1.321533203125, |
|
"logps/rejected": -1156.223876953125, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20611576735973358, |
|
"rewards/margins": 11.165016174316406, |
|
"rewards/rejected": -10.958898544311523, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5568735172930454, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 2.4400600059661836e-06, |
|
"logits/chosen": -2.069483757019043, |
|
"logits/rejected": -2.31620717048645, |
|
"logps/chosen": -1.093656301498413, |
|
"logps/rejected": -1508.9503173828125, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2100195437669754, |
|
"rewards/margins": 14.735700607299805, |
|
"rewards/rejected": -14.52568244934082, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5593707079535523, |
|
"grad_norm": 0.009765625, |
|
"learning_rate": 2.41827037666064e-06, |
|
"logits/chosen": -2.2314319610595703, |
|
"logits/rejected": -2.4116859436035156, |
|
"logps/chosen": -0.6631449460983276, |
|
"logps/rejected": -1216.8101806640625, |
|
"loss": 0.2259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2116110622882843, |
|
"rewards/margins": 11.8889799118042, |
|
"rewards/rejected": -11.677370071411133, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5618678986140592, |
|
"grad_norm": 0.035888671875, |
|
"learning_rate": 2.396486961021983e-06, |
|
"logits/chosen": -2.156050443649292, |
|
"logits/rejected": -2.355743885040283, |
|
"logps/chosen": -0.5853773951530457, |
|
"logps/rejected": -1307.397705078125, |
|
"loss": 0.2272, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21641604602336884, |
|
"rewards/margins": 12.776580810546875, |
|
"rewards/rejected": -12.560165405273438, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5643650892745661, |
|
"grad_norm": 0.0247802734375, |
|
"learning_rate": 2.3747114151802993e-06, |
|
"logits/chosen": -2.2995388507843018, |
|
"logits/rejected": -2.4979677200317383, |
|
"logps/chosen": -1.0234979391098022, |
|
"logps/rejected": -1314.0380859375, |
|
"loss": 0.2272, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2088872194290161, |
|
"rewards/margins": 12.864030838012695, |
|
"rewards/rejected": -12.655143737792969, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.566862279935073, |
|
"grad_norm": 0.04345703125, |
|
"learning_rate": 2.352945394667363e-06, |
|
"logits/chosen": -2.087890386581421, |
|
"logits/rejected": -2.308422803878784, |
|
"logps/chosen": -0.9035698771476746, |
|
"logps/rejected": -1510.1090087890625, |
|
"loss": 0.2248, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2111314833164215, |
|
"rewards/margins": 14.675390243530273, |
|
"rewards/rejected": -14.464259147644043, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.56935947059558, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 2.3311905542907627e-06, |
|
"logits/chosen": -2.234039545059204, |
|
"logits/rejected": -2.428889751434326, |
|
"logps/chosen": -0.797686755657196, |
|
"logps/rejected": -1220.7269287109375, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2120717316865921, |
|
"rewards/margins": 11.918030738830566, |
|
"rewards/rejected": -11.70595932006836, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5718566612560869, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 2.30944854800809e-06, |
|
"logits/chosen": -2.1873550415039062, |
|
"logits/rejected": -2.3636820316314697, |
|
"logps/chosen": -0.8641906976699829, |
|
"logps/rejected": -1375.240478515625, |
|
"loss": 0.2275, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21045894920825958, |
|
"rewards/margins": 13.458274841308594, |
|
"rewards/rejected": -13.247815132141113, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5743538519165938, |
|
"grad_norm": 0.0224609375, |
|
"learning_rate": 2.287721028801204e-06, |
|
"logits/chosen": -2.147500991821289, |
|
"logits/rejected": -2.3285794258117676, |
|
"logps/chosen": -1.5540382862091064, |
|
"logps/rejected": -1261.9169921875, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2090359479188919, |
|
"rewards/margins": 12.258954048156738, |
|
"rewards/rejected": -12.049917221069336, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5768510425771007, |
|
"grad_norm": 0.224609375, |
|
"learning_rate": 2.26600964855055e-06, |
|
"logits/chosen": -2.2112767696380615, |
|
"logits/rejected": -2.387683868408203, |
|
"logps/chosen": -1.0878078937530518, |
|
"logps/rejected": -1259.334716796875, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2047530710697174, |
|
"rewards/margins": 12.326273918151855, |
|
"rewards/rejected": -12.121520042419434, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5793482332376076, |
|
"grad_norm": 0.033203125, |
|
"learning_rate": 2.244316057909573e-06, |
|
"logits/chosen": -2.179072856903076, |
|
"logits/rejected": -2.3518600463867188, |
|
"logps/chosen": -0.5903832912445068, |
|
"logps/rejected": -1252.9005126953125, |
|
"loss": 0.2288, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20970389246940613, |
|
"rewards/margins": 12.249414443969727, |
|
"rewards/rejected": -12.039710998535156, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5818454238981147, |
|
"grad_norm": 0.043212890625, |
|
"learning_rate": 2.2226419061792282e-06, |
|
"logits/chosen": -2.2571616172790527, |
|
"logits/rejected": -2.4548702239990234, |
|
"logps/chosen": -0.747587263584137, |
|
"logps/rejected": -1403.0311279296875, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20638947188854218, |
|
"rewards/margins": 13.71589183807373, |
|
"rewards/rejected": -13.509503364562988, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5843426145586216, |
|
"grad_norm": 0.0079345703125, |
|
"learning_rate": 2.200988841182589e-06, |
|
"logits/chosen": -2.1915557384490967, |
|
"logits/rejected": -2.3925371170043945, |
|
"logps/chosen": -0.653125524520874, |
|
"logps/rejected": -1481.6878662109375, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20729561150074005, |
|
"rewards/margins": 14.506765365600586, |
|
"rewards/rejected": -14.299470901489258, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5868398052191285, |
|
"grad_norm": 0.0286865234375, |
|
"learning_rate": 2.179358509139559e-06, |
|
"logits/chosen": -2.149214267730713, |
|
"logits/rejected": -2.344883680343628, |
|
"logps/chosen": -2.6051526069641113, |
|
"logps/rejected": -1142.56201171875, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21485964953899384, |
|
"rewards/margins": 11.022435188293457, |
|
"rewards/rejected": -10.807573318481445, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5893369958796354, |
|
"grad_norm": 0.041748046875, |
|
"learning_rate": 2.1577525545417254e-06, |
|
"logits/chosen": -2.1596992015838623, |
|
"logits/rejected": -2.3585286140441895, |
|
"logps/chosen": -0.6524207592010498, |
|
"logps/rejected": -1219.5198974609375, |
|
"loss": 0.227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2145775854587555, |
|
"rewards/margins": 11.869647026062012, |
|
"rewards/rejected": -11.655069351196289, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5918341865401423, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 2.1361726200273293e-06, |
|
"logits/chosen": -2.247102737426758, |
|
"logits/rejected": -2.4553802013397217, |
|
"logps/chosen": -1.189576506614685, |
|
"logps/rejected": -1349.142578125, |
|
"loss": 0.2259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21253642439842224, |
|
"rewards/margins": 13.139638900756836, |
|
"rewards/rejected": -12.927103042602539, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5943313772006493, |
|
"grad_norm": 0.035888671875, |
|
"learning_rate": 2.1146203462563773e-06, |
|
"logits/chosen": -2.302658796310425, |
|
"logits/rejected": -2.4925646781921387, |
|
"logps/chosen": -0.5675852298736572, |
|
"logps/rejected": -1279.3642578125, |
|
"loss": 0.2272, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20174381136894226, |
|
"rewards/margins": 12.505022048950195, |
|
"rewards/rejected": -12.303278923034668, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5968285678611562, |
|
"grad_norm": 0.024169921875, |
|
"learning_rate": 2.0930973717859117e-06, |
|
"logits/chosen": -2.3194613456726074, |
|
"logits/rejected": -2.526947498321533, |
|
"logps/chosen": -0.6186977624893188, |
|
"logps/rejected": -1298.871826171875, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21502625942230225, |
|
"rewards/margins": 12.666671752929688, |
|
"rewards/rejected": -12.45164680480957, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5993257585216631, |
|
"grad_norm": 0.0150146484375, |
|
"learning_rate": 2.0716053329454337e-06, |
|
"logits/chosen": -2.0586659908294678, |
|
"logits/rejected": -2.262817621231079, |
|
"logps/chosen": -1.2787067890167236, |
|
"logps/rejected": -1338.0716552734375, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21538551151752472, |
|
"rewards/margins": 12.998420715332031, |
|
"rewards/rejected": -12.783034324645996, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.60182294918217, |
|
"grad_norm": 0.03369140625, |
|
"learning_rate": 2.0501458637124963e-06, |
|
"logits/chosen": -2.1946122646331787, |
|
"logits/rejected": -2.4308152198791504, |
|
"logps/chosen": -0.9974037408828735, |
|
"logps/rejected": -1574.956787109375, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21448758244514465, |
|
"rewards/margins": 15.443408012390137, |
|
"rewards/rejected": -15.228919982910156, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.604320139842677, |
|
"grad_norm": 0.021484375, |
|
"learning_rate": 2.0287205955884812e-06, |
|
"logits/chosen": -2.1859405040740967, |
|
"logits/rejected": -2.419334888458252, |
|
"logps/chosen": -1.4137351512908936, |
|
"logps/rejected": -1243.725830078125, |
|
"loss": 0.2266, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21700558066368103, |
|
"rewards/margins": 11.884050369262695, |
|
"rewards/rejected": -11.667045593261719, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6068173305031839, |
|
"grad_norm": 0.0595703125, |
|
"learning_rate": 2.0073311574745583e-06, |
|
"logits/chosen": -2.162872791290283, |
|
"logits/rejected": -2.378561019897461, |
|
"logps/chosen": -0.7903895378112793, |
|
"logps/rejected": -1427.887939453125, |
|
"loss": 0.2243, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21638064086437225, |
|
"rewards/margins": 13.9492769241333, |
|
"rewards/rejected": -13.73289680480957, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.6093145211636909, |
|
"grad_norm": 0.037841796875, |
|
"learning_rate": 1.9859791755478453e-06, |
|
"logits/chosen": -2.1776349544525146, |
|
"logits/rejected": -2.3626227378845215, |
|
"logps/chosen": -1.0283732414245605, |
|
"logps/rejected": -1148.4774169921875, |
|
"loss": 0.2259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21198368072509766, |
|
"rewards/margins": 11.212942123413086, |
|
"rewards/rejected": -11.000959396362305, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6118117118241978, |
|
"grad_norm": 0.0311279296875, |
|
"learning_rate": 1.9646662731377737e-06, |
|
"logits/chosen": -2.130434989929199, |
|
"logits/rejected": -2.3274593353271484, |
|
"logps/chosen": -0.7933204770088196, |
|
"logps/rejected": -1231.31201171875, |
|
"loss": 0.2282, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20909221470355988, |
|
"rewards/margins": 11.990662574768066, |
|
"rewards/rejected": -11.781569480895996, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6143089024847047, |
|
"grad_norm": 0.06689453125, |
|
"learning_rate": 1.9433940706026743e-06, |
|
"logits/chosen": -2.162235736846924, |
|
"logits/rejected": -2.3636813163757324, |
|
"logps/chosen": -0.8596396446228027, |
|
"logps/rejected": -1512.643798828125, |
|
"loss": 0.2267, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21136781573295593, |
|
"rewards/margins": 14.79273796081543, |
|
"rewards/rejected": -14.581372261047363, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.6168060931452116, |
|
"grad_norm": 0.040283203125, |
|
"learning_rate": 1.9221641852065807e-06, |
|
"logits/chosen": -2.153958797454834, |
|
"logits/rejected": -2.322754383087158, |
|
"logps/chosen": -0.7868290543556213, |
|
"logps/rejected": -1277.087890625, |
|
"loss": 0.2275, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21162231266498566, |
|
"rewards/margins": 12.481648445129395, |
|
"rewards/rejected": -12.270025253295898, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.6193032838057185, |
|
"grad_norm": 0.0302734375, |
|
"learning_rate": 1.9009782309962805e-06, |
|
"logits/chosen": -2.2541210651397705, |
|
"logits/rejected": -2.451572895050049, |
|
"logps/chosen": -0.9773980379104614, |
|
"logps/rejected": -1259.029296875, |
|
"loss": 0.2243, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21588608622550964, |
|
"rewards/margins": 12.217048645019531, |
|
"rewards/rejected": -12.001164436340332, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.6218004744662256, |
|
"grad_norm": 0.042236328125, |
|
"learning_rate": 1.8798378186785979e-06, |
|
"logits/chosen": -2.208289623260498, |
|
"logits/rejected": -2.3975791931152344, |
|
"logps/chosen": -0.47841542959213257, |
|
"logps/rejected": -1317.6165771484375, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21394380927085876, |
|
"rewards/margins": 12.884051322937012, |
|
"rewards/rejected": -12.670106887817383, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.6242976651267325, |
|
"grad_norm": 0.0172119140625, |
|
"learning_rate": 1.8587445554979404e-06, |
|
"logits/chosen": -2.054529905319214, |
|
"logits/rejected": -2.2491745948791504, |
|
"logps/chosen": -0.9916723370552063, |
|
"logps/rejected": -1467.903076171875, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21281161904335022, |
|
"rewards/margins": 14.350473403930664, |
|
"rewards/rejected": -14.137661933898926, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6267948557872394, |
|
"grad_norm": 0.04052734375, |
|
"learning_rate": 1.8377000451141013e-06, |
|
"logits/chosen": -2.1033387184143066, |
|
"logits/rejected": -2.311828136444092, |
|
"logps/chosen": -1.013270616531372, |
|
"logps/rejected": -1430.32568359375, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2092626839876175, |
|
"rewards/margins": 13.935432434082031, |
|
"rewards/rejected": -13.726168632507324, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6292920464477463, |
|
"grad_norm": 0.033203125, |
|
"learning_rate": 1.8167058874803405e-06, |
|
"logits/chosen": -2.2198266983032227, |
|
"logits/rejected": -2.435263156890869, |
|
"logps/chosen": -1.5374799966812134, |
|
"logps/rejected": -1410.561279296875, |
|
"loss": 0.2258, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21642649173736572, |
|
"rewards/margins": 13.645418167114258, |
|
"rewards/rejected": -13.428991317749023, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6317892371082532, |
|
"grad_norm": 0.025390625, |
|
"learning_rate": 1.7957636787217451e-06, |
|
"logits/chosen": -2.1474337577819824, |
|
"logits/rejected": -2.3489108085632324, |
|
"logps/chosen": -0.525337278842926, |
|
"logps/rejected": -1465.3909912109375, |
|
"loss": 0.2257, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21127943694591522, |
|
"rewards/margins": 14.354647636413574, |
|
"rewards/rejected": -14.1433687210083, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6342864277687601, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 1.7748750110138768e-06, |
|
"logits/chosen": -2.1010701656341553, |
|
"logits/rejected": -2.3061635494232178, |
|
"logps/chosen": -1.495689034461975, |
|
"logps/rejected": -1522.001953125, |
|
"loss": 0.2248, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21690087020397186, |
|
"rewards/margins": 14.7809476852417, |
|
"rewards/rejected": -14.564045906066895, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6367836184292671, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 1.7540414724617282e-06, |
|
"logits/chosen": -2.0483648777008057, |
|
"logits/rejected": -2.2502453327178955, |
|
"logps/chosen": -1.7171008586883545, |
|
"logps/rejected": -1322.4296875, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21715514361858368, |
|
"rewards/margins": 12.821990966796875, |
|
"rewards/rejected": -12.604835510253906, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.639280809089774, |
|
"grad_norm": 0.0155029296875, |
|
"learning_rate": 1.7332646469789827e-06, |
|
"logits/chosen": -2.2271251678466797, |
|
"logits/rejected": -2.4021248817443848, |
|
"logps/chosen": -0.7044438719749451, |
|
"logps/rejected": -1151.026611328125, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21061739325523376, |
|
"rewards/margins": 11.253252983093262, |
|
"rewards/rejected": -11.042635917663574, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6417779997502809, |
|
"grad_norm": 0.031494140625, |
|
"learning_rate": 1.7125461141675881e-06, |
|
"logits/chosen": -2.115159034729004, |
|
"logits/rejected": -2.321096181869507, |
|
"logps/chosen": -1.5179011821746826, |
|
"logps/rejected": -1341.9727783203125, |
|
"loss": 0.2269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21012921631336212, |
|
"rewards/margins": 13.043965339660645, |
|
"rewards/rejected": -12.833836555480957, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6442751904107878, |
|
"grad_norm": 0.07177734375, |
|
"learning_rate": 1.6918874491976744e-06, |
|
"logits/chosen": -2.262359619140625, |
|
"logits/rejected": -2.4549667835235596, |
|
"logps/chosen": -1.1417173147201538, |
|
"logps/rejected": -1349.908203125, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21297034621238708, |
|
"rewards/margins": 13.129191398620605, |
|
"rewards/rejected": -12.916219711303711, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6467723810712948, |
|
"grad_norm": 0.019287109375, |
|
"learning_rate": 1.6712902226877917e-06, |
|
"logits/chosen": -2.1325788497924805, |
|
"logits/rejected": -2.323542356491089, |
|
"logps/chosen": -1.002483606338501, |
|
"logps/rejected": -1407.6922607421875, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21138958632946014, |
|
"rewards/margins": 13.752557754516602, |
|
"rewards/rejected": -13.541168212890625, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6492695717318018, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 1.6507560005854977e-06, |
|
"logits/chosen": -2.0466830730438232, |
|
"logits/rejected": -2.254211664199829, |
|
"logps/chosen": -1.2699908018112183, |
|
"logps/rejected": -1284.965576171875, |
|
"loss": 0.2255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22062024474143982, |
|
"rewards/margins": 12.382702827453613, |
|
"rewards/rejected": -12.16208267211914, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6517667623923087, |
|
"grad_norm": 0.0283203125, |
|
"learning_rate": 1.6302863440483121e-06, |
|
"logits/chosen": -2.102281093597412, |
|
"logits/rejected": -2.344468832015991, |
|
"logps/chosen": -0.9672495722770691, |
|
"logps/rejected": -1371.63232421875, |
|
"loss": 0.2237, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2286236733198166, |
|
"rewards/margins": 13.317922592163086, |
|
"rewards/rejected": -13.089300155639648, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6542639530528156, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 1.6098828093250203e-06, |
|
"logits/chosen": -2.012927770614624, |
|
"logits/rejected": -2.23055100440979, |
|
"logps/chosen": -2.223574161529541, |
|
"logps/rejected": -1439.184326171875, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2114640474319458, |
|
"rewards/margins": 13.80018424987793, |
|
"rewards/rejected": -13.588720321655273, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6567611437133225, |
|
"grad_norm": 0.0201416015625, |
|
"learning_rate": 1.5895469476373545e-06, |
|
"logits/chosen": -2.0998306274414062, |
|
"logits/rejected": -2.284853935241699, |
|
"logps/chosen": -1.0365889072418213, |
|
"logps/rejected": -1287.3863525390625, |
|
"loss": 0.2258, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21419724822044373, |
|
"rewards/margins": 12.467586517333984, |
|
"rewards/rejected": -12.253389358520508, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6592583343738294, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 1.5692803050620642e-06, |
|
"logits/chosen": -2.1266770362854004, |
|
"logits/rejected": -2.341489553451538, |
|
"logps/chosen": -1.9875209331512451, |
|
"logps/rejected": -1219.6407470703125, |
|
"loss": 0.2258, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21611304581165314, |
|
"rewards/margins": 11.721773147583008, |
|
"rewards/rejected": -11.505661010742188, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6617555250343363, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 1.5490844224133717e-06, |
|
"logits/chosen": -2.178802251815796, |
|
"logits/rejected": -2.3850629329681396, |
|
"logps/chosen": -1.1978418827056885, |
|
"logps/rejected": -1456.7591552734375, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2064187228679657, |
|
"rewards/margins": 14.242889404296875, |
|
"rewards/rejected": -14.036470413208008, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6642527156948433, |
|
"grad_norm": 0.02978515625, |
|
"learning_rate": 1.528960835125822e-06, |
|
"logits/chosen": -2.3235323429107666, |
|
"logits/rejected": -2.508779525756836, |
|
"logps/chosen": -0.7140904664993286, |
|
"logps/rejected": -1262.5396728515625, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2111320048570633, |
|
"rewards/margins": 12.3548002243042, |
|
"rewards/rejected": -12.143668174743652, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6667499063553503, |
|
"grad_norm": 0.04541015625, |
|
"learning_rate": 1.5089110731375568e-06, |
|
"logits/chosen": -2.1535146236419678, |
|
"logits/rejected": -2.346010446548462, |
|
"logps/chosen": -1.2154910564422607, |
|
"logps/rejected": -1353.01416015625, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21193823218345642, |
|
"rewards/margins": 13.174649238586426, |
|
"rewards/rejected": -12.962712287902832, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6692470970158572, |
|
"grad_norm": 0.0224609375, |
|
"learning_rate": 1.4889366607739925e-06, |
|
"logits/chosen": -2.2847390174865723, |
|
"logits/rejected": -2.437983989715576, |
|
"logps/chosen": -0.47022026777267456, |
|
"logps/rejected": -1079.610595703125, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20733828842639923, |
|
"rewards/margins": 10.533978462219238, |
|
"rewards/rejected": -10.326639175415039, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6717442876763641, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 1.4690391166319307e-06, |
|
"logits/chosen": -2.091798782348633, |
|
"logits/rejected": -2.286367177963257, |
|
"logps/chosen": -0.8848400115966797, |
|
"logps/rejected": -1370.623046875, |
|
"loss": 0.2255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21229073405265808, |
|
"rewards/margins": 13.300837516784668, |
|
"rewards/rejected": -13.088546752929688, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.674241478336871, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 1.4492199534641055e-06, |
|
"logits/chosen": -2.1903884410858154, |
|
"logits/rejected": -2.389869451522827, |
|
"logps/chosen": -0.7620021104812622, |
|
"logps/rejected": -1357.733642578125, |
|
"loss": 0.2269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2085207998752594, |
|
"rewards/margins": 13.300318717956543, |
|
"rewards/rejected": -13.091796875, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.676738668997378, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 1.429480678064174e-06, |
|
"logits/chosen": -2.1907572746276855, |
|
"logits/rejected": -2.4412574768066406, |
|
"logps/chosen": -1.4903779029846191, |
|
"logps/rejected": -1532.8353271484375, |
|
"loss": 0.2235, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22088858485221863, |
|
"rewards/margins": 14.948209762573242, |
|
"rewards/rejected": -14.727320671081543, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6792358596578849, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 1.4098227911521523e-06, |
|
"logits/chosen": -2.1927340030670166, |
|
"logits/rejected": -2.384458065032959, |
|
"logps/chosen": -1.0519030094146729, |
|
"logps/rejected": -1408.5384521484375, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21749384701251984, |
|
"rewards/margins": 13.769442558288574, |
|
"rewards/rejected": -13.551948547363281, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6817330503183918, |
|
"grad_norm": 0.0206298828125, |
|
"learning_rate": 1.3902477872603295e-06, |
|
"logits/chosen": -2.292635440826416, |
|
"logits/rejected": -2.4606173038482666, |
|
"logps/chosen": -1.3724099397659302, |
|
"logps/rejected": -1059.629638671875, |
|
"loss": 0.2292, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2094014585018158, |
|
"rewards/margins": 10.156023025512695, |
|
"rewards/rejected": -9.946621894836426, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6842302409788987, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 1.370757154619638e-06, |
|
"logits/chosen": -2.2135720252990723, |
|
"logits/rejected": -2.4035539627075195, |
|
"logps/chosen": -0.8492560386657715, |
|
"logps/rejected": -1440.1517333984375, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21952596306800842, |
|
"rewards/margins": 13.991949081420898, |
|
"rewards/rejected": -13.772422790527344, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6867274316394056, |
|
"grad_norm": 0.0546875, |
|
"learning_rate": 1.3513523750465049e-06, |
|
"logits/chosen": -2.2055509090423584, |
|
"logits/rejected": -2.3952600955963135, |
|
"logps/chosen": -0.848610520362854, |
|
"logps/rejected": -1253.37841796875, |
|
"loss": 0.2269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2106543481349945, |
|
"rewards/margins": 12.153672218322754, |
|
"rewards/rejected": -11.94301700592041, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6892246222999125, |
|
"grad_norm": 0.0078125, |
|
"learning_rate": 1.332034923830199e-06, |
|
"logits/chosen": -2.1199612617492676, |
|
"logits/rejected": -2.3331620693206787, |
|
"logps/chosen": -0.572918176651001, |
|
"logps/rejected": -1314.5574951171875, |
|
"loss": 0.2245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21192285418510437, |
|
"rewards/margins": 12.847297668457031, |
|
"rewards/rejected": -12.635372161865234, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6917218129604196, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 1.31280626962067e-06, |
|
"logits/chosen": -2.242522716522217, |
|
"logits/rejected": -2.4255213737487793, |
|
"logps/chosen": -0.6031197905540466, |
|
"logps/rejected": -1176.1724853515625, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21441105008125305, |
|
"rewards/margins": 11.398508071899414, |
|
"rewards/rejected": -11.18409538269043, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6942190036209265, |
|
"grad_norm": 0.03759765625, |
|
"learning_rate": 1.2936678743168813e-06, |
|
"logits/chosen": -2.1787726879119873, |
|
"logits/rejected": -2.379664659500122, |
|
"logps/chosen": -0.6903184652328491, |
|
"logps/rejected": -1316.2584228515625, |
|
"loss": 0.228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2146139144897461, |
|
"rewards/margins": 12.865110397338867, |
|
"rewards/rejected": -12.650495529174805, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6967161942814334, |
|
"grad_norm": 0.033935546875, |
|
"learning_rate": 1.2746211929556777e-06, |
|
"logits/chosen": -2.1566481590270996, |
|
"logits/rejected": -2.4140141010284424, |
|
"logps/chosen": -0.8048852682113647, |
|
"logps/rejected": -1725.7884521484375, |
|
"loss": 0.2244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21056988835334778, |
|
"rewards/margins": 16.911666870117188, |
|
"rewards/rejected": -16.70109748840332, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6992133849419403, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 1.2556676736011558e-06, |
|
"logits/chosen": -2.1705546379089355, |
|
"logits/rejected": -2.36136531829834, |
|
"logps/chosen": -1.7305570840835571, |
|
"logps/rejected": -1468.9334716796875, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21282191574573517, |
|
"rewards/margins": 14.295863151550293, |
|
"rewards/rejected": -14.083041191101074, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7017105756024472, |
|
"grad_norm": 0.0264892578125, |
|
"learning_rate": 1.2368087572345772e-06, |
|
"logits/chosen": -2.2008700370788574, |
|
"logits/rejected": -2.3622145652770996, |
|
"logps/chosen": -0.9749493598937988, |
|
"logps/rejected": -1153.006103515625, |
|
"loss": 0.2278, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20674769580364227, |
|
"rewards/margins": 11.259106636047363, |
|
"rewards/rejected": -11.052358627319336, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.7042077662629542, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 1.2180458776448067e-06, |
|
"logits/chosen": -2.183065891265869, |
|
"logits/rejected": -2.4031364917755127, |
|
"logps/chosen": -1.3278162479400635, |
|
"logps/rejected": -1352.5931396484375, |
|
"loss": 0.2276, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21398906409740448, |
|
"rewards/margins": 13.097040176391602, |
|
"rewards/rejected": -12.883050918579102, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.7067049569234611, |
|
"grad_norm": 0.04150390625, |
|
"learning_rate": 1.1993804613193158e-06, |
|
"logits/chosen": -2.166015625, |
|
"logits/rejected": -2.376171827316284, |
|
"logps/chosen": -0.8504392504692078, |
|
"logps/rejected": -1218.2906494140625, |
|
"loss": 0.2242, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2224057912826538, |
|
"rewards/margins": 11.764514923095703, |
|
"rewards/rejected": -11.542108535766602, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.709202147583968, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 1.1808139273357232e-06, |
|
"logits/chosen": -2.1249091625213623, |
|
"logits/rejected": -2.324924945831299, |
|
"logps/chosen": -1.2602803707122803, |
|
"logps/rejected": -1440.6927490234375, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21478672325611115, |
|
"rewards/margins": 13.95526123046875, |
|
"rewards/rejected": -13.740473747253418, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.711699338244475, |
|
"grad_norm": 0.0186767578125, |
|
"learning_rate": 1.1623476872539108e-06, |
|
"logits/chosen": -2.1342732906341553, |
|
"logits/rejected": -2.3520121574401855, |
|
"logps/chosen": -1.0939338207244873, |
|
"logps/rejected": -1569.9661865234375, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.209732323884964, |
|
"rewards/margins": 15.359413146972656, |
|
"rewards/rejected": -15.149681091308594, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.7141965289049819, |
|
"grad_norm": 0.0206298828125, |
|
"learning_rate": 1.1439831450087032e-06, |
|
"logits/chosen": -2.1833555698394775, |
|
"logits/rejected": -2.408240795135498, |
|
"logps/chosen": -1.4031983613967896, |
|
"logps/rejected": -1495.2554931640625, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20904704928398132, |
|
"rewards/margins": 14.592279434204102, |
|
"rewards/rejected": -14.383232116699219, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.7166937195654888, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 1.1257216968031357e-06, |
|
"logits/chosen": -2.1499791145324707, |
|
"logits/rejected": -2.3467013835906982, |
|
"logps/chosen": -0.6740778684616089, |
|
"logps/rejected": -1315.198486328125, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2084966003894806, |
|
"rewards/margins": 12.877813339233398, |
|
"rewards/rejected": -12.669316291809082, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.7191909102259958, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 1.1075647310022974e-06, |
|
"logits/chosen": -2.293015956878662, |
|
"logits/rejected": -2.477437973022461, |
|
"logps/chosen": -0.6577932238578796, |
|
"logps/rejected": -1144.9639892578125, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21200187504291534, |
|
"rewards/margins": 11.185698509216309, |
|
"rewards/rejected": -10.973695755004883, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.7216881008865027, |
|
"grad_norm": 0.0341796875, |
|
"learning_rate": 1.0895136280277863e-06, |
|
"logits/chosen": -2.1305599212646484, |
|
"logits/rejected": -2.3395919799804688, |
|
"logps/chosen": -0.9710084795951843, |
|
"logps/rejected": -1521.902099609375, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21521084010601044, |
|
"rewards/margins": 14.770858764648438, |
|
"rewards/rejected": -14.555648803710938, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.7241852915470096, |
|
"grad_norm": 0.02685546875, |
|
"learning_rate": 1.0715697602527542e-06, |
|
"logits/chosen": -1.9920504093170166, |
|
"logits/rejected": -2.2198596000671387, |
|
"logps/chosen": -0.49225324392318726, |
|
"logps/rejected": -1440.2822265625, |
|
"loss": 0.2255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21176087856292725, |
|
"rewards/margins": 13.888765335083008, |
|
"rewards/rejected": -13.67700481414795, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7266824822075165, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 1.0537344918975708e-06, |
|
"logits/chosen": -2.1923391819000244, |
|
"logits/rejected": -2.3587305545806885, |
|
"logps/chosen": -2.3005270957946777, |
|
"logps/rejected": -1118.677490234375, |
|
"loss": 0.2245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22067633271217346, |
|
"rewards/margins": 10.68152141571045, |
|
"rewards/rejected": -10.460844993591309, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7291796728680234, |
|
"grad_norm": 0.0277099609375, |
|
"learning_rate": 1.036009178926107e-06, |
|
"logits/chosen": -2.162017822265625, |
|
"logits/rejected": -2.350229263305664, |
|
"logps/chosen": -0.4403456151485443, |
|
"logps/rejected": -1365.908203125, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21478745341300964, |
|
"rewards/margins": 13.359176635742188, |
|
"rewards/rejected": -13.144391059875488, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7316768635285305, |
|
"grad_norm": 0.2041015625, |
|
"learning_rate": 1.0183951689426438e-06, |
|
"logits/chosen": -2.0874218940734863, |
|
"logits/rejected": -2.286980152130127, |
|
"logps/chosen": -1.1334517002105713, |
|
"logps/rejected": -1574.8843994140625, |
|
"loss": 0.2267, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20781561732292175, |
|
"rewards/margins": 15.404953002929688, |
|
"rewards/rejected": -15.197137832641602, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7341740541890374, |
|
"grad_norm": 0.033203125, |
|
"learning_rate": 1.0008938010894156e-06, |
|
"logits/chosen": -2.05769419670105, |
|
"logits/rejected": -2.291485548019409, |
|
"logps/chosen": -0.6213763356208801, |
|
"logps/rejected": -1545.57421875, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21286337077617645, |
|
"rewards/margins": 15.127001762390137, |
|
"rewards/rejected": -14.914140701293945, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7366712448495443, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 9.83506405944804e-07, |
|
"logits/chosen": -2.0132200717926025, |
|
"logits/rejected": -2.2228617668151855, |
|
"logps/chosen": -1.0132176876068115, |
|
"logps/rejected": -1225.736572265625, |
|
"loss": 0.2248, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21595139801502228, |
|
"rewards/margins": 11.760801315307617, |
|
"rewards/rejected": -11.544851303100586, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7391684355100512, |
|
"grad_norm": 0.02099609375, |
|
"learning_rate": 9.662343054221743e-07, |
|
"logits/chosen": -2.038722515106201, |
|
"logits/rejected": -2.254706621170044, |
|
"logps/chosen": -1.0080900192260742, |
|
"logps/rejected": -1486.7254638671875, |
|
"loss": 0.2248, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21779987215995789, |
|
"rewards/margins": 14.319659233093262, |
|
"rewards/rejected": -14.101860046386719, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7416656261705581, |
|
"grad_norm": 0.033447265625, |
|
"learning_rate": 9.490788126693754e-07, |
|
"logits/chosen": -2.05572247505188, |
|
"logits/rejected": -2.270496129989624, |
|
"logps/chosen": -1.580960988998413, |
|
"logps/rejected": -1349.623779296875, |
|
"loss": 0.2259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21096165478229523, |
|
"rewards/margins": 13.037325859069824, |
|
"rewards/rejected": -12.826364517211914, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.744162816831065, |
|
"grad_norm": 0.034423828125, |
|
"learning_rate": 9.32041231968904e-07, |
|
"logits/chosen": -2.135493040084839, |
|
"logits/rejected": -2.3431620597839355, |
|
"logps/chosen": -0.692672848701477, |
|
"logps/rejected": -1422.2606201171875, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21278159320354462, |
|
"rewards/margins": 13.839566230773926, |
|
"rewards/rejected": -13.626785278320312, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.746660007491572, |
|
"grad_norm": 0.038330078125, |
|
"learning_rate": 9.151228586387464e-07, |
|
"logits/chosen": -2.1877083778381348, |
|
"logits/rejected": -2.3766164779663086, |
|
"logps/chosen": -0.7439475655555725, |
|
"logps/rejected": -1241.116943359375, |
|
"loss": 0.2292, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2107519656419754, |
|
"rewards/margins": 12.074844360351562, |
|
"rewards/rejected": -11.864092826843262, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7491571981520789, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 8.983249789338941e-07, |
|
"logits/chosen": -2.150568723678589, |
|
"logits/rejected": -2.329155445098877, |
|
"logps/chosen": -0.8139511346817017, |
|
"logps/rejected": -1264.535888671875, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20758719742298126, |
|
"rewards/margins": 12.365687370300293, |
|
"rewards/rejected": -12.158101081848145, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7491571981520789, |
|
"eval_logits/chosen": -2.5715415477752686, |
|
"eval_logits/rejected": -2.65895676612854, |
|
"eval_logps/chosen": -0.12666501104831696, |
|
"eval_logps/rejected": -650.5204467773438, |
|
"eval_loss": 0.22132699191570282, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.25786837935447693, |
|
"eval_rewards/margins": 6.314352512359619, |
|
"eval_rewards/rejected": -6.056484222412109, |
|
"eval_runtime": 0.6559, |
|
"eval_samples_per_second": 7.623, |
|
"eval_steps_per_second": 4.574, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7516543888125858, |
|
"grad_norm": 0.0194091796875, |
|
"learning_rate": 8.816488699485593e-07, |
|
"logits/chosen": -2.176842212677002, |
|
"logits/rejected": -2.3571324348449707, |
|
"logps/chosen": -0.4218795895576477, |
|
"logps/rejected": -1318.904052734375, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21097974479198456, |
|
"rewards/margins": 12.884817123413086, |
|
"rewards/rejected": -12.67383861541748, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7541515794730927, |
|
"grad_norm": 0.041015625, |
|
"learning_rate": 8.650957995190784e-07, |
|
"logits/chosen": -2.1513025760650635, |
|
"logits/rejected": -2.3777430057525635, |
|
"logps/chosen": -1.3863859176635742, |
|
"logps/rejected": -1556.2415771484375, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21383562684059143, |
|
"rewards/margins": 15.212430000305176, |
|
"rewards/rejected": -14.9985933303833, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7566487701335997, |
|
"grad_norm": 0.029541015625, |
|
"learning_rate": 8.486670261275193e-07, |
|
"logits/chosen": -2.252506732940674, |
|
"logits/rejected": -2.452782392501831, |
|
"logps/chosen": -0.9220790863037109, |
|
"logps/rejected": -1333.130615234375, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20681767165660858, |
|
"rewards/margins": 13.058314323425293, |
|
"rewards/rejected": -12.851496696472168, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7591459607941067, |
|
"grad_norm": 0.0244140625, |
|
"learning_rate": 8.32363798806011e-07, |
|
"logits/chosen": -2.2259833812713623, |
|
"logits/rejected": -2.4163031578063965, |
|
"logps/chosen": -0.6000443696975708, |
|
"logps/rejected": -1395.551513671875, |
|
"loss": 0.2265, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21475176513195038, |
|
"rewards/margins": 13.636807441711426, |
|
"rewards/rejected": -13.42205810546875, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7616431514546136, |
|
"grad_norm": 0.02490234375, |
|
"learning_rate": 8.161873570417742e-07, |
|
"logits/chosen": -2.1769793033599854, |
|
"logits/rejected": -2.3828330039978027, |
|
"logps/chosen": -0.49710139632225037, |
|
"logps/rejected": -1448.161865234375, |
|
"loss": 0.2243, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2149210423231125, |
|
"rewards/margins": 14.16187858581543, |
|
"rewards/rejected": -13.946958541870117, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7641403421151205, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 8.001389306828897e-07, |
|
"logits/chosen": -2.094914674758911, |
|
"logits/rejected": -2.325759172439575, |
|
"logps/chosen": -1.7604669332504272, |
|
"logps/rejected": -1601.197998046875, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2165949046611786, |
|
"rewards/margins": 15.435786247253418, |
|
"rewards/rejected": -15.219189643859863, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7666375327756274, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 7.842197398447993e-07, |
|
"logits/chosen": -2.119885206222534, |
|
"logits/rejected": -2.3199260234832764, |
|
"logps/chosen": -1.7511274814605713, |
|
"logps/rejected": -1411.588623046875, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2108723670244217, |
|
"rewards/margins": 13.716115951538086, |
|
"rewards/rejected": -13.505243301391602, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7691347234361343, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 7.684309948175414e-07, |
|
"logits/chosen": -2.0922672748565674, |
|
"logits/rejected": -2.2642672061920166, |
|
"logps/chosen": -0.6221259832382202, |
|
"logps/rejected": -1387.583251953125, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2143571376800537, |
|
"rewards/margins": 13.555415153503418, |
|
"rewards/rejected": -13.341056823730469, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.7716319140966412, |
|
"grad_norm": 0.0257568359375, |
|
"learning_rate": 7.527738959737371e-07, |
|
"logits/chosen": -2.1526269912719727, |
|
"logits/rejected": -2.3576555252075195, |
|
"logps/chosen": -1.0096280574798584, |
|
"logps/rejected": -1377.4000244140625, |
|
"loss": 0.2246, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2141624391078949, |
|
"rewards/margins": 13.408686637878418, |
|
"rewards/rejected": -13.194523811340332, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7741291047571482, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 7.372496336773269e-07, |
|
"logits/chosen": -2.1142802238464355, |
|
"logits/rejected": -2.297616958618164, |
|
"logps/chosen": -0.8569754362106323, |
|
"logps/rejected": -1148.1827392578125, |
|
"loss": 0.2277, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21184638142585754, |
|
"rewards/margins": 11.140499114990234, |
|
"rewards/rejected": -10.9286527633667, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7766262954176552, |
|
"grad_norm": 0.0341796875, |
|
"learning_rate": 7.218593881930744e-07, |
|
"logits/chosen": -2.2074034214019775, |
|
"logits/rejected": -2.391183853149414, |
|
"logps/chosen": -0.8030778765678406, |
|
"logps/rejected": -1229.139404296875, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2128894329071045, |
|
"rewards/margins": 12.000519752502441, |
|
"rewards/rejected": -11.787630081176758, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7791234860781621, |
|
"grad_norm": 0.040283203125, |
|
"learning_rate": 7.066043295968342e-07, |
|
"logits/chosen": -2.1711983680725098, |
|
"logits/rejected": -2.370105266571045, |
|
"logps/chosen": -1.8718315362930298, |
|
"logps/rejected": -1323.237060546875, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2125745713710785, |
|
"rewards/margins": 12.693571090698242, |
|
"rewards/rejected": -12.480997085571289, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.781620676738669, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 6.914856176865891e-07, |
|
"logits/chosen": -2.255979537963867, |
|
"logits/rejected": -2.4583592414855957, |
|
"logps/chosen": -1.3440260887145996, |
|
"logps/rejected": -1252.6439208984375, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.205407053232193, |
|
"rewards/margins": 12.165246963500977, |
|
"rewards/rejected": -11.959839820861816, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.7841178673991759, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 6.765044018942804e-07, |
|
"logits/chosen": -2.2532248497009277, |
|
"logits/rejected": -2.4564757347106934, |
|
"logps/chosen": -0.6921563744544983, |
|
"logps/rejected": -1213.1630859375, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20914144814014435, |
|
"rewards/margins": 11.834319114685059, |
|
"rewards/rejected": -11.625177383422852, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7866150580596829, |
|
"grad_norm": 0.034423828125, |
|
"learning_rate": 6.616618211984169e-07, |
|
"logits/chosen": -2.1614041328430176, |
|
"logits/rejected": -2.3516342639923096, |
|
"logps/chosen": -0.3677124083042145, |
|
"logps/rejected": -1380.3824462890625, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2100847065448761, |
|
"rewards/margins": 13.517425537109375, |
|
"rewards/rejected": -13.307340621948242, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7891122487201898, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 6.469590040374799e-07, |
|
"logits/chosen": -2.108102560043335, |
|
"logits/rejected": -2.3011136054992676, |
|
"logps/chosen": -0.5627329349517822, |
|
"logps/rejected": -1450.8382568359375, |
|
"loss": 0.2246, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21635802090168, |
|
"rewards/margins": 14.105690002441406, |
|
"rewards/rejected": -13.88933277130127, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.7916094393806967, |
|
"grad_norm": 0.038818359375, |
|
"learning_rate": 6.32397068224136e-07, |
|
"logits/chosen": -2.2220847606658936, |
|
"logits/rejected": -2.4407782554626465, |
|
"logps/chosen": -0.8599641919136047, |
|
"logps/rejected": -1339.21826171875, |
|
"loss": 0.2275, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21565623581409454, |
|
"rewards/margins": 13.00416088104248, |
|
"rewards/rejected": -12.788503646850586, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7941066300412036, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 6.17977120860249e-07, |
|
"logits/chosen": -2.208421230316162, |
|
"logits/rejected": -2.4098763465881348, |
|
"logps/chosen": -1.8245439529418945, |
|
"logps/rejected": -1350.473876953125, |
|
"loss": 0.2257, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21073952317237854, |
|
"rewards/margins": 13.214599609375, |
|
"rewards/rejected": -13.003860473632812, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7966038207017105, |
|
"grad_norm": 0.03857421875, |
|
"learning_rate": 6.037002582527121e-07, |
|
"logits/chosen": -2.1419100761413574, |
|
"logits/rejected": -2.3298497200012207, |
|
"logps/chosen": -0.7109914422035217, |
|
"logps/rejected": -1332.651123046875, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21053273975849152, |
|
"rewards/margins": 12.884849548339844, |
|
"rewards/rejected": -12.67431640625, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7991010113622175, |
|
"grad_norm": 0.0263671875, |
|
"learning_rate": 5.895675658300981e-07, |
|
"logits/chosen": -2.310133934020996, |
|
"logits/rejected": -2.4916586875915527, |
|
"logps/chosen": -0.809489905834198, |
|
"logps/rejected": -1163.7008056640625, |
|
"loss": 0.2252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21247372031211853, |
|
"rewards/margins": 11.380758285522461, |
|
"rewards/rejected": -11.168285369873047, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8015982020227245, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 5.755801180601381e-07, |
|
"logits/chosen": -2.2009828090667725, |
|
"logits/rejected": -2.4239349365234375, |
|
"logps/chosen": -0.8167581558227539, |
|
"logps/rejected": -1351.5517578125, |
|
"loss": 0.225, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21433226764202118, |
|
"rewards/margins": 13.204305648803711, |
|
"rewards/rejected": -12.989973068237305, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.8040953926832314, |
|
"grad_norm": 0.0390625, |
|
"learning_rate": 5.617389783680307e-07, |
|
"logits/chosen": -2.080115795135498, |
|
"logits/rejected": -2.3226914405822754, |
|
"logps/chosen": -0.7443311810493469, |
|
"logps/rejected": -1530.034423828125, |
|
"loss": 0.224, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21728749573230743, |
|
"rewards/margins": 14.894986152648926, |
|
"rewards/rejected": -14.677696228027344, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.8065925833437383, |
|
"grad_norm": 0.232421875, |
|
"learning_rate": 5.48045199055596e-07, |
|
"logits/chosen": -2.1640877723693848, |
|
"logits/rejected": -2.36962628364563, |
|
"logps/chosen": -1.1329087018966675, |
|
"logps/rejected": -1273.104736328125, |
|
"loss": 0.2282, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20660333335399628, |
|
"rewards/margins": 12.42498779296875, |
|
"rewards/rejected": -12.218384742736816, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.8090897740042452, |
|
"grad_norm": 0.043212890625, |
|
"learning_rate": 5.344998212212704e-07, |
|
"logits/chosen": -2.091491937637329, |
|
"logits/rejected": -2.327758312225342, |
|
"logps/chosen": -1.4844013452529907, |
|
"logps/rejected": -1502.2427978515625, |
|
"loss": 0.224, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2210853397846222, |
|
"rewards/margins": 14.54228687286377, |
|
"rewards/rejected": -14.321202278137207, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.8115869646647521, |
|
"grad_norm": 0.03564453125, |
|
"learning_rate": 5.211038746809551e-07, |
|
"logits/chosen": -2.192322015762329, |
|
"logits/rejected": -2.3808138370513916, |
|
"logps/chosen": -0.5706063508987427, |
|
"logps/rejected": -1285.364013671875, |
|
"loss": 0.227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20710210502147675, |
|
"rewards/margins": 12.564165115356445, |
|
"rewards/rejected": -12.357062339782715, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8140841553252591, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 5.078583778897216e-07, |
|
"logits/chosen": -2.1883485317230225, |
|
"logits/rejected": -2.3633830547332764, |
|
"logps/chosen": -1.4209082126617432, |
|
"logps/rejected": -1214.212890625, |
|
"loss": 0.2275, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21422162652015686, |
|
"rewards/margins": 11.81783390045166, |
|
"rewards/rejected": -11.603612899780273, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.816581345985766, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 4.94764337864384e-07, |
|
"logits/chosen": -2.2724106311798096, |
|
"logits/rejected": -2.4622254371643066, |
|
"logps/chosen": -0.9480986595153809, |
|
"logps/rejected": -1307.112548828125, |
|
"loss": 0.2255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2102733850479126, |
|
"rewards/margins": 12.694864273071289, |
|
"rewards/rejected": -12.484590530395508, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.819078536646273, |
|
"grad_norm": 0.0203857421875, |
|
"learning_rate": 4.818227501069328e-07, |
|
"logits/chosen": -2.2342686653137207, |
|
"logits/rejected": -2.4815187454223633, |
|
"logps/chosen": -1.358782410621643, |
|
"logps/rejected": -1722.28125, |
|
"loss": 0.2233, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21825866401195526, |
|
"rewards/margins": 16.87509536743164, |
|
"rewards/rejected": -16.656835556030273, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.8215757273067799, |
|
"grad_norm": 0.031982421875, |
|
"learning_rate": 4.690345985288572e-07, |
|
"logits/chosen": -2.1274971961975098, |
|
"logits/rejected": -2.328562021255493, |
|
"logps/chosen": -1.1761976480484009, |
|
"logps/rejected": -1403.730224609375, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.213038831949234, |
|
"rewards/margins": 13.633008003234863, |
|
"rewards/rejected": -13.41996955871582, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.8240729179672868, |
|
"grad_norm": 0.01953125, |
|
"learning_rate": 4.5640085537633633e-07, |
|
"logits/chosen": -2.1780600547790527, |
|
"logits/rejected": -2.418370008468628, |
|
"logps/chosen": -1.041684865951538, |
|
"logps/rejected": -1449.9898681640625, |
|
"loss": 0.2235, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21910138428211212, |
|
"rewards/margins": 14.122426986694336, |
|
"rewards/rejected": -13.903326034545898, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.8265701086277938, |
|
"grad_norm": 0.04052734375, |
|
"learning_rate": 4.439224811563211e-07, |
|
"logits/chosen": -2.0584537982940674, |
|
"logits/rejected": -2.258396625518799, |
|
"logps/chosen": -0.6486183404922485, |
|
"logps/rejected": -1476.123291015625, |
|
"loss": 0.2255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21381358802318573, |
|
"rewards/margins": 14.360862731933594, |
|
"rewards/rejected": -14.147050857543945, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.8290672992883007, |
|
"grad_norm": 0.04345703125, |
|
"learning_rate": 4.316004245635158e-07, |
|
"logits/chosen": -2.147899866104126, |
|
"logits/rejected": -2.3480546474456787, |
|
"logps/chosen": -1.0383152961730957, |
|
"logps/rejected": -1587.1812744140625, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2098073661327362, |
|
"rewards/margins": 15.521720886230469, |
|
"rewards/rejected": -15.311912536621094, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.8315644899488076, |
|
"grad_norm": 0.01336669921875, |
|
"learning_rate": 4.194356224082455e-07, |
|
"logits/chosen": -2.0754525661468506, |
|
"logits/rejected": -2.304088592529297, |
|
"logps/chosen": -0.6566920876502991, |
|
"logps/rejected": -1547.8634033203125, |
|
"loss": 0.227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20915034413337708, |
|
"rewards/margins": 15.037984848022461, |
|
"rewards/rejected": -14.828834533691406, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8340616806093145, |
|
"grad_norm": 0.0283203125, |
|
"learning_rate": 4.074289995452338e-07, |
|
"logits/chosen": -2.141746997833252, |
|
"logits/rejected": -2.3306097984313965, |
|
"logps/chosen": -0.9173293113708496, |
|
"logps/rejected": -1333.7867431640625, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2176503688097, |
|
"rewards/margins": 13.049924850463867, |
|
"rewards/rejected": -12.832275390625, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.8365588712698214, |
|
"grad_norm": 0.01904296875, |
|
"learning_rate": 3.9558146880329246e-07, |
|
"logits/chosen": -2.1531293392181396, |
|
"logits/rejected": -2.3555006980895996, |
|
"logps/chosen": -1.041372537612915, |
|
"logps/rejected": -1363.6673583984375, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21647608280181885, |
|
"rewards/margins": 13.151887893676758, |
|
"rewards/rejected": -12.935412406921387, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8390560619303283, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 3.838939309159187e-07, |
|
"logits/chosen": -2.150744915008545, |
|
"logits/rejected": -2.3291797637939453, |
|
"logps/chosen": -0.6859675645828247, |
|
"logps/rejected": -1347.5732421875, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21168136596679688, |
|
"rewards/margins": 13.16771411895752, |
|
"rewards/rejected": -12.956031799316406, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8415532525908354, |
|
"grad_norm": 0.0213623046875, |
|
"learning_rate": 3.723672744528162e-07, |
|
"logits/chosen": -2.225355863571167, |
|
"logits/rejected": -2.434971570968628, |
|
"logps/chosen": -0.7719963788986206, |
|
"logps/rejected": -1404.882568359375, |
|
"loss": 0.2245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2120278775691986, |
|
"rewards/margins": 13.696516036987305, |
|
"rewards/rejected": -13.484487533569336, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8440504432513423, |
|
"grad_norm": 0.0220947265625, |
|
"learning_rate": 3.6100237575233647e-07, |
|
"logits/chosen": -2.2835781574249268, |
|
"logits/rejected": -2.459686279296875, |
|
"logps/chosen": -0.8155478239059448, |
|
"logps/rejected": -1183.509033203125, |
|
"loss": 0.2259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21422457695007324, |
|
"rewards/margins": 11.59019660949707, |
|
"rewards/rejected": -11.375970840454102, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8465476339118492, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 3.4980009885486054e-07, |
|
"logits/chosen": -2.2139523029327393, |
|
"logits/rejected": -2.3762905597686768, |
|
"logps/chosen": -0.49865931272506714, |
|
"logps/rejected": -1125.1737060546875, |
|
"loss": 0.2277, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20752505958080292, |
|
"rewards/margins": 10.996343612670898, |
|
"rewards/rejected": -10.788819313049316, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8490448245723561, |
|
"grad_norm": 0.01556396484375, |
|
"learning_rate": 3.3876129543710197e-07, |
|
"logits/chosen": -2.184354305267334, |
|
"logits/rejected": -2.3724493980407715, |
|
"logps/chosen": -0.690311074256897, |
|
"logps/rejected": -1528.7587890625, |
|
"loss": 0.2248, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21172885596752167, |
|
"rewards/margins": 14.938085556030273, |
|
"rewards/rejected": -14.726354598999023, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.851542015232863, |
|
"grad_norm": 0.01495361328125, |
|
"learning_rate": 3.2788680474735687e-07, |
|
"logits/chosen": -2.1705164909362793, |
|
"logits/rejected": -2.373166561126709, |
|
"logps/chosen": -0.5612165927886963, |
|
"logps/rejected": -1317.1187744140625, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20889082551002502, |
|
"rewards/margins": 12.858512878417969, |
|
"rewards/rejected": -12.649621963500977, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.85403920589337, |
|
"grad_norm": 0.00872802734375, |
|
"learning_rate": 3.1717745354170214e-07, |
|
"logits/chosen": -2.071406841278076, |
|
"logits/rejected": -2.2939293384552, |
|
"logps/chosen": -0.8238442540168762, |
|
"logps/rejected": -1532.26513671875, |
|
"loss": 0.2244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21248868107795715, |
|
"rewards/margins": 15.00029468536377, |
|
"rewards/rejected": -14.787805557250977, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.8565363965538769, |
|
"grad_norm": 0.021484375, |
|
"learning_rate": 3.0663405602113727e-07, |
|
"logits/chosen": -2.24153208732605, |
|
"logits/rejected": -2.467984676361084, |
|
"logps/chosen": -0.9753687977790833, |
|
"logps/rejected": -1389.8427734375, |
|
"loss": 0.225, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20957298576831818, |
|
"rewards/margins": 13.579081535339355, |
|
"rewards/rejected": -13.369508743286133, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8590335872143838, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 2.9625741376968107e-07, |
|
"logits/chosen": -2.060586452484131, |
|
"logits/rejected": -2.3030850887298584, |
|
"logps/chosen": -2.972282886505127, |
|
"logps/rejected": -1365.322265625, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21505007147789001, |
|
"rewards/margins": 12.994850158691406, |
|
"rewards/rejected": -12.77979850769043, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8615307778748907, |
|
"grad_norm": 0.02001953125, |
|
"learning_rate": 2.8604831569343324e-07, |
|
"logits/chosen": -2.2799830436706543, |
|
"logits/rejected": -2.4574227333068848, |
|
"logps/chosen": -0.9521903991699219, |
|
"logps/rejected": -1208.1971435546875, |
|
"loss": 0.2266, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21667388081550598, |
|
"rewards/margins": 11.710010528564453, |
|
"rewards/rejected": -11.493337631225586, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8640279685353977, |
|
"grad_norm": 0.028564453125, |
|
"learning_rate": 2.760075379605942e-07, |
|
"logits/chosen": -2.1184418201446533, |
|
"logits/rejected": -2.292738199234009, |
|
"logps/chosen": -0.882199764251709, |
|
"logps/rejected": -1400.0753173828125, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20649878680706024, |
|
"rewards/margins": 13.685522079467773, |
|
"rewards/rejected": -13.479023933410645, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8665251591959046, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 2.661358439424552e-07, |
|
"logits/chosen": -2.1794090270996094, |
|
"logits/rejected": -2.3647027015686035, |
|
"logps/chosen": -0.8141934275627136, |
|
"logps/rejected": -1179.304931640625, |
|
"loss": 0.2276, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21081428229808807, |
|
"rewards/margins": 11.469918251037598, |
|
"rewards/rejected": -11.25910472869873, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8690223498564116, |
|
"grad_norm": 0.0262451171875, |
|
"learning_rate": 2.564339841553615e-07, |
|
"logits/chosen": -2.1696417331695557, |
|
"logits/rejected": -2.341275453567505, |
|
"logps/chosen": -0.6168124675750732, |
|
"logps/rejected": -1255.4180908203125, |
|
"loss": 0.2276, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20439627766609192, |
|
"rewards/margins": 12.246126174926758, |
|
"rewards/rejected": -12.041730880737305, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8715195405169185, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 2.469026962036539e-07, |
|
"logits/chosen": -2.155325412750244, |
|
"logits/rejected": -2.346266984939575, |
|
"logps/chosen": -1.7188537120819092, |
|
"logps/rejected": -1198.50634765625, |
|
"loss": 0.2282, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21203342080116272, |
|
"rewards/margins": 11.423583984375, |
|
"rewards/rejected": -11.211549758911133, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8740167311774254, |
|
"grad_norm": 0.033447265625, |
|
"learning_rate": 2.3754270472358786e-07, |
|
"logits/chosen": -2.1500706672668457, |
|
"logits/rejected": -2.346287965774536, |
|
"logps/chosen": -1.2322837114334106, |
|
"logps/rejected": -1203.8701171875, |
|
"loss": 0.2269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2134041041135788, |
|
"rewards/margins": 11.585506439208984, |
|
"rewards/rejected": -11.372100830078125, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8765139218379323, |
|
"grad_norm": 0.0419921875, |
|
"learning_rate": 2.283547213282458e-07, |
|
"logits/chosen": -2.26165509223938, |
|
"logits/rejected": -2.4591403007507324, |
|
"logps/chosen": -1.2189921140670776, |
|
"logps/rejected": -1291.148193359375, |
|
"loss": 0.2255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21497786045074463, |
|
"rewards/margins": 12.45503044128418, |
|
"rewards/rejected": -12.24005126953125, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8790111124984392, |
|
"grad_norm": 0.0400390625, |
|
"learning_rate": 2.1933944455343166e-07, |
|
"logits/chosen": -1.9996531009674072, |
|
"logits/rejected": -2.232881784439087, |
|
"logps/chosen": -1.0685181617736816, |
|
"logps/rejected": -1328.8775634765625, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21257129311561584, |
|
"rewards/margins": 12.904253959655762, |
|
"rewards/rejected": -12.691683769226074, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8815083031589462, |
|
"grad_norm": 0.01324462890625, |
|
"learning_rate": 2.104975598045647e-07, |
|
"logits/chosen": -2.1279807090759277, |
|
"logits/rejected": -2.3155367374420166, |
|
"logps/chosen": -0.7418814897537231, |
|
"logps/rejected": -1234.801025390625, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21231353282928467, |
|
"rewards/margins": 12.064626693725586, |
|
"rewards/rejected": -11.852312088012695, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8840054938194531, |
|
"grad_norm": 0.04248046875, |
|
"learning_rate": 2.018297393045701e-07, |
|
"logits/chosen": -2.169581651687622, |
|
"logits/rejected": -2.334414005279541, |
|
"logps/chosen": -1.1093792915344238, |
|
"logps/rejected": -1281.266357421875, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.209160715341568, |
|
"rewards/margins": 12.48926067352295, |
|
"rewards/rejected": -12.280099868774414, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8865026844799601, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 1.9333664204277236e-07, |
|
"logits/chosen": -2.0912182331085205, |
|
"logits/rejected": -2.292468309402466, |
|
"logps/chosen": -0.8641373515129089, |
|
"logps/rejected": -1473.767333984375, |
|
"loss": 0.2265, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21468326449394226, |
|
"rewards/margins": 14.221132278442383, |
|
"rewards/rejected": -14.006446838378906, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.888999875140467, |
|
"grad_norm": 0.017578125, |
|
"learning_rate": 1.8501891372479124e-07, |
|
"logits/chosen": -2.155086040496826, |
|
"logits/rejected": -2.3607256412506104, |
|
"logps/chosen": -1.0332701206207275, |
|
"logps/rejected": -1407.046875, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21279795467853546, |
|
"rewards/margins": 13.675623893737793, |
|
"rewards/rejected": -13.4628267288208, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8914970658009739, |
|
"grad_norm": 0.10498046875, |
|
"learning_rate": 1.7687718672345533e-07, |
|
"logits/chosen": -2.1115050315856934, |
|
"logits/rejected": -2.295365810394287, |
|
"logps/chosen": -1.0766206979751587, |
|
"logps/rejected": -1537.2154541015625, |
|
"loss": 0.225, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21170708537101746, |
|
"rewards/margins": 15.0249662399292, |
|
"rewards/rejected": -14.813258171081543, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8939942564614808, |
|
"grad_norm": 0.03515625, |
|
"learning_rate": 1.689120800307212e-07, |
|
"logits/chosen": -2.010655403137207, |
|
"logits/rejected": -2.2343146800994873, |
|
"logps/chosen": -0.6959076523780823, |
|
"logps/rejected": -1583.5645751953125, |
|
"loss": 0.2245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2187151461839676, |
|
"rewards/margins": 15.251518249511719, |
|
"rewards/rejected": -15.032801628112793, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8964914471219878, |
|
"grad_norm": 0.041259765625, |
|
"learning_rate": 1.6112419921061357e-07, |
|
"logits/chosen": -2.149298906326294, |
|
"logits/rejected": -2.3335325717926025, |
|
"logps/chosen": -1.0091092586517334, |
|
"logps/rejected": -1295.9100341796875, |
|
"loss": 0.2272, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2126895934343338, |
|
"rewards/margins": 12.669659614562988, |
|
"rewards/rejected": -12.456971168518066, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8989886377824947, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 1.5351413635318807e-07, |
|
"logits/chosen": -2.2476723194122314, |
|
"logits/rejected": -2.4481379985809326, |
|
"logps/chosen": -1.025138258934021, |
|
"logps/rejected": -1300.3070068359375, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21075649559497833, |
|
"rewards/margins": 12.60840892791748, |
|
"rewards/rejected": -12.397652626037598, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.9014858284430016, |
|
"grad_norm": 0.0279541015625, |
|
"learning_rate": 1.460824700295138e-07, |
|
"logits/chosen": -2.246796131134033, |
|
"logits/rejected": -2.438882350921631, |
|
"logps/chosen": -1.5276464223861694, |
|
"logps/rejected": -1376.2548828125, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21834063529968262, |
|
"rewards/margins": 13.44012451171875, |
|
"rewards/rejected": -13.221783638000488, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.9039830191035085, |
|
"grad_norm": 0.0157470703125, |
|
"learning_rate": 1.3882976524768694e-07, |
|
"logits/chosen": -2.2246479988098145, |
|
"logits/rejected": -2.397996425628662, |
|
"logps/chosen": -1.2670552730560303, |
|
"logps/rejected": -1179.010986328125, |
|
"loss": 0.2277, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21077945828437805, |
|
"rewards/margins": 11.500974655151367, |
|
"rewards/rejected": -11.290196418762207, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.9064802097640154, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 1.3175657340987664e-07, |
|
"logits/chosen": -2.1487388610839844, |
|
"logits/rejected": -2.334177255630493, |
|
"logps/chosen": -0.5317996740341187, |
|
"logps/rejected": -1380.195556640625, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2111264169216156, |
|
"rewards/margins": 13.50303840637207, |
|
"rewards/rejected": -13.291911125183105, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.9089774004245225, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 1.2486343227040122e-07, |
|
"logits/chosen": -2.2575807571411133, |
|
"logits/rejected": -2.471717357635498, |
|
"logps/chosen": -1.4988012313842773, |
|
"logps/rejected": -1318.032470703125, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22440342605113983, |
|
"rewards/margins": 12.804231643676758, |
|
"rewards/rejected": -12.579828262329102, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.9114745910850294, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 1.181508658948452e-07, |
|
"logits/chosen": -2.189079999923706, |
|
"logits/rejected": -2.372708559036255, |
|
"logps/chosen": -0.8293665051460266, |
|
"logps/rejected": -1286.6043701171875, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21110188961029053, |
|
"rewards/margins": 12.531554222106934, |
|
"rewards/rejected": -12.320451736450195, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.9139717817455363, |
|
"grad_norm": 0.018798828125, |
|
"learning_rate": 1.1161938462021627e-07, |
|
"logits/chosen": -2.082040309906006, |
|
"logits/rejected": -2.2717125415802, |
|
"logps/chosen": -1.0598349571228027, |
|
"logps/rejected": -1245.3990478515625, |
|
"loss": 0.228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21748106181621552, |
|
"rewards/margins": 12.090206146240234, |
|
"rewards/rejected": -11.872724533081055, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.9164689724060432, |
|
"grad_norm": 0.06982421875, |
|
"learning_rate": 1.0526948501614536e-07, |
|
"logits/chosen": -2.103464126586914, |
|
"logits/rejected": -2.3152968883514404, |
|
"logps/chosen": -1.075402021408081, |
|
"logps/rejected": -1461.2308349609375, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21564142405986786, |
|
"rewards/margins": 14.216550827026367, |
|
"rewards/rejected": -14.000910758972168, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.9189661630665501, |
|
"grad_norm": 0.06982421875, |
|
"learning_rate": 9.910164984713477e-08, |
|
"logits/chosen": -2.1121301651000977, |
|
"logits/rejected": -2.327693223953247, |
|
"logps/chosen": -1.3442718982696533, |
|
"logps/rejected": -1471.4466552734375, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21234098076820374, |
|
"rewards/margins": 14.340484619140625, |
|
"rewards/rejected": -14.128143310546875, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.921463353727057, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 9.311634803585323e-08, |
|
"logits/chosen": -2.1561217308044434, |
|
"logits/rejected": -2.3662197589874268, |
|
"logps/chosen": -0.8007721900939941, |
|
"logps/rejected": -1469.8878173828125, |
|
"loss": 0.2286, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2085859775543213, |
|
"rewards/margins": 14.383298873901367, |
|
"rewards/rejected": -14.174713134765625, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.923960544387564, |
|
"grad_norm": 0.068359375, |
|
"learning_rate": 8.7314034627487e-08, |
|
"logits/chosen": -2.203339099884033, |
|
"logits/rejected": -2.40441632270813, |
|
"logps/chosen": -0.5535265207290649, |
|
"logps/rejected": -1442.5123291015625, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20969831943511963, |
|
"rewards/margins": 14.11566162109375, |
|
"rewards/rejected": -13.905962944030762, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9264577350480709, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 8.16951507551439e-08, |
|
"logits/chosen": -2.2100465297698975, |
|
"logits/rejected": -2.394742250442505, |
|
"logps/chosen": -1.0725539922714233, |
|
"logps/rejected": -1294.114990234375, |
|
"loss": 0.2255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21020770072937012, |
|
"rewards/margins": 12.512723922729492, |
|
"rewards/rejected": -12.302515029907227, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.9289549257085778, |
|
"grad_norm": 0.01544189453125, |
|
"learning_rate": 7.626012360631291e-08, |
|
"logits/chosen": -2.2372231483459473, |
|
"logits/rejected": -2.4310178756713867, |
|
"logps/chosen": -1.1079142093658447, |
|
"logps/rejected": -1298.0045166015625, |
|
"loss": 0.2261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21124926209449768, |
|
"rewards/margins": 12.63310718536377, |
|
"rewards/rejected": -12.421857833862305, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.9314521163690848, |
|
"grad_norm": 0.03466796875, |
|
"learning_rate": 7.100936639038936e-08, |
|
"logits/chosen": -2.0344414710998535, |
|
"logits/rejected": -2.2667644023895264, |
|
"logps/chosen": -1.0317838191986084, |
|
"logps/rejected": -1655.9033203125, |
|
"loss": 0.224, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2157924920320511, |
|
"rewards/margins": 16.17062759399414, |
|
"rewards/rejected": -15.954833984375, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.9339493070295917, |
|
"grad_norm": 0.00531005859375, |
|
"learning_rate": 6.594327830725916e-08, |
|
"logits/chosen": -2.162308931350708, |
|
"logits/rejected": -2.371338367462158, |
|
"logps/chosen": -0.7821828722953796, |
|
"logps/rejected": -1442.607177734375, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21128495037555695, |
|
"rewards/margins": 14.142976760864258, |
|
"rewards/rejected": -13.931692123413086, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.9364464976900987, |
|
"grad_norm": 0.043701171875, |
|
"learning_rate": 6.106224451694592e-08, |
|
"logits/chosen": -2.1930558681488037, |
|
"logits/rejected": -2.386634111404419, |
|
"logps/chosen": -0.6907114386558533, |
|
"logps/rejected": -1420.8463134765625, |
|
"loss": 0.225, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21480241417884827, |
|
"rewards/margins": 13.8462495803833, |
|
"rewards/rejected": -13.631448745727539, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9389436883506056, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 5.636663611033266e-08, |
|
"logits/chosen": -2.058790683746338, |
|
"logits/rejected": -2.274880886077881, |
|
"logps/chosen": -0.41397613286972046, |
|
"logps/rejected": -1431.2852783203125, |
|
"loss": 0.2266, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20900818705558777, |
|
"rewards/margins": 14.00958251953125, |
|
"rewards/rejected": -13.800572395324707, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.9414408790111125, |
|
"grad_norm": 0.019287109375, |
|
"learning_rate": 5.185681008094579e-08, |
|
"logits/chosen": -2.251438617706299, |
|
"logits/rejected": -2.4458415508270264, |
|
"logps/chosen": -1.0221302509307861, |
|
"logps/rejected": -1385.7362060546875, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21662044525146484, |
|
"rewards/margins": 13.469772338867188, |
|
"rewards/rejected": -13.253152847290039, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9439380696716194, |
|
"grad_norm": 0.03857421875, |
|
"learning_rate": 4.753310929781513e-08, |
|
"logits/chosen": -2.206300973892212, |
|
"logits/rejected": -2.3716189861297607, |
|
"logps/chosen": -0.6498397588729858, |
|
"logps/rejected": -1291.197021484375, |
|
"loss": 0.2258, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21270425617694855, |
|
"rewards/margins": 12.625164985656738, |
|
"rewards/rejected": -12.412460327148438, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9464352603321263, |
|
"grad_norm": 0.033447265625, |
|
"learning_rate": 4.3395862479405914e-08, |
|
"logits/chosen": -2.1362087726593018, |
|
"logits/rejected": -2.332123041152954, |
|
"logps/chosen": -1.0763086080551147, |
|
"logps/rejected": -1387.7713623046875, |
|
"loss": 0.2244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2146589756011963, |
|
"rewards/margins": 13.39326286315918, |
|
"rewards/rejected": -13.178604125976562, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9489324509926332, |
|
"grad_norm": 0.0155029296875, |
|
"learning_rate": 3.9445384168628474e-08, |
|
"logits/chosen": -2.291581869125366, |
|
"logits/rejected": -2.500275135040283, |
|
"logps/chosen": -1.0031490325927734, |
|
"logps/rejected": -1276.675537109375, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20909054577350616, |
|
"rewards/margins": 12.3539457321167, |
|
"rewards/rejected": -12.144854545593262, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9514296416531403, |
|
"grad_norm": 0.0284423828125, |
|
"learning_rate": 3.5681974708923484e-08, |
|
"logits/chosen": -2.1034350395202637, |
|
"logits/rejected": -2.2940239906311035, |
|
"logps/chosen": -0.8783596158027649, |
|
"logps/rejected": -1220.660888671875, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21768565475940704, |
|
"rewards/margins": 11.806703567504883, |
|
"rewards/rejected": -11.589017868041992, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9539268323136472, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 3.210592022142717e-08, |
|
"logits/chosen": -2.1330649852752686, |
|
"logits/rejected": -2.2985074520111084, |
|
"logps/chosen": -0.7123221158981323, |
|
"logps/rejected": -1336.329833984375, |
|
"loss": 0.2258, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20608916878700256, |
|
"rewards/margins": 12.982263565063477, |
|
"rewards/rejected": -12.776172637939453, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9564240229741541, |
|
"grad_norm": 0.021484375, |
|
"learning_rate": 2.8717492583220095e-08, |
|
"logits/chosen": -2.225675106048584, |
|
"logits/rejected": -2.428712844848633, |
|
"logps/chosen": -0.8774779438972473, |
|
"logps/rejected": -1398.2039794921875, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20993375778198242, |
|
"rewards/margins": 13.676666259765625, |
|
"rewards/rejected": -13.4667329788208, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.958921213634661, |
|
"grad_norm": 0.03271484375, |
|
"learning_rate": 2.551694940665539e-08, |
|
"logits/chosen": -2.163163423538208, |
|
"logits/rejected": -2.351386070251465, |
|
"logps/chosen": -0.9975617527961731, |
|
"logps/rejected": -1255.6383056640625, |
|
"loss": 0.2265, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21191437542438507, |
|
"rewards/margins": 12.254827499389648, |
|
"rewards/rejected": -12.042913436889648, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9614184042951679, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 2.2504534019774092e-08, |
|
"logits/chosen": -2.3171262741088867, |
|
"logits/rejected": -2.492202043533325, |
|
"logps/chosen": -0.872540295124054, |
|
"logps/rejected": -1181.1051025390625, |
|
"loss": 0.2264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21220049262046814, |
|
"rewards/margins": 11.461995124816895, |
|
"rewards/rejected": -11.24979305267334, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9639155949556749, |
|
"grad_norm": 0.032958984375, |
|
"learning_rate": 1.9680475447805826e-08, |
|
"logits/chosen": -2.1993744373321533, |
|
"logits/rejected": -2.380159378051758, |
|
"logps/chosen": -0.721504807472229, |
|
"logps/rejected": -1297.561767578125, |
|
"loss": 0.2275, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20789849758148193, |
|
"rewards/margins": 12.683464050292969, |
|
"rewards/rejected": -12.475565910339355, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9664127856161818, |
|
"grad_norm": 0.0201416015625, |
|
"learning_rate": 1.70449883957563e-08, |
|
"logits/chosen": -2.232905626296997, |
|
"logits/rejected": -2.4287447929382324, |
|
"logps/chosen": -2.1762092113494873, |
|
"logps/rejected": -1314.664794921875, |
|
"loss": 0.2253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21345773339271545, |
|
"rewards/margins": 12.700533866882324, |
|
"rewards/rejected": -12.487077713012695, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9689099762766887, |
|
"grad_norm": 0.072265625, |
|
"learning_rate": 1.4598273232083182e-08, |
|
"logits/chosen": -2.198019027709961, |
|
"logits/rejected": -2.3671329021453857, |
|
"logps/chosen": -0.9621660113334656, |
|
"logps/rejected": -1280.2039794921875, |
|
"loss": 0.2287, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20697855949401855, |
|
"rewards/margins": 12.497517585754395, |
|
"rewards/rejected": -12.29054069519043, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9714071669371956, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 1.2340515973464917e-08, |
|
"logits/chosen": -2.1526544094085693, |
|
"logits/rejected": -2.3664348125457764, |
|
"logps/chosen": -1.546007752418518, |
|
"logps/rejected": -1401.54638671875, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20804882049560547, |
|
"rewards/margins": 13.68072509765625, |
|
"rewards/rejected": -13.472674369812012, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.9739043575977026, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 1.0271888270655118e-08, |
|
"logits/chosen": -2.043034076690674, |
|
"logits/rejected": -2.229666233062744, |
|
"logps/chosen": -0.9901046752929688, |
|
"logps/rejected": -1281.4815673828125, |
|
"loss": 0.2265, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2106485813856125, |
|
"rewards/margins": 12.358712196350098, |
|
"rewards/rejected": -12.148063659667969, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9764015482582095, |
|
"grad_norm": 0.07666015625, |
|
"learning_rate": 8.392547395435769e-09, |
|
"logits/chosen": -2.374267101287842, |
|
"logits/rejected": -2.551339626312256, |
|
"logps/chosen": -1.2009716033935547, |
|
"logps/rejected": -1176.1605224609375, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20642951130867004, |
|
"rewards/margins": 11.431352615356445, |
|
"rewards/rejected": -11.224924087524414, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9788987389187165, |
|
"grad_norm": 0.0732421875, |
|
"learning_rate": 6.702636228657911e-09, |
|
"logits/chosen": -2.262585163116455, |
|
"logits/rejected": -2.4511070251464844, |
|
"logps/chosen": -0.7528651356697083, |
|
"logps/rejected": -1265.910400390625, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21303972601890564, |
|
"rewards/margins": 12.352733612060547, |
|
"rewards/rejected": -12.139693260192871, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9813959295792234, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 5.2022832493800465e-09, |
|
"logits/chosen": -2.3309874534606934, |
|
"logits/rejected": -2.5094618797302246, |
|
"logps/chosen": -0.8482611775398254, |
|
"logps/rejected": -1145.723876953125, |
|
"loss": 0.225, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2170940339565277, |
|
"rewards/margins": 11.18010139465332, |
|
"rewards/rejected": -10.963006973266602, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9838931202397303, |
|
"grad_norm": 0.0174560546875, |
|
"learning_rate": 3.891602525100124e-09, |
|
"logits/chosen": -2.202822208404541, |
|
"logits/rejected": -2.4167404174804688, |
|
"logps/chosen": -0.8022462725639343, |
|
"logps/rejected": -1359.097412109375, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21217508614063263, |
|
"rewards/margins": 13.16607666015625, |
|
"rewards/rejected": -12.953901290893555, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9863903109002372, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 2.7706937030827495e-09, |
|
"logits/chosen": -2.245856285095215, |
|
"logits/rejected": -2.436892032623291, |
|
"logps/chosen": -1.236242651939392, |
|
"logps/rejected": -1134.9066162109375, |
|
"loss": 0.226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20883974432945251, |
|
"rewards/margins": 10.997222900390625, |
|
"rewards/rejected": -10.788381576538086, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9888875015607441, |
|
"grad_norm": 0.07568359375, |
|
"learning_rate": 1.839642002783859e-09, |
|
"logits/chosen": -2.1721549034118652, |
|
"logits/rejected": -2.3608601093292236, |
|
"logps/chosen": -0.9914839863777161, |
|
"logps/rejected": -1147.5926513671875, |
|
"loss": 0.2273, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21253648400306702, |
|
"rewards/margins": 11.109753608703613, |
|
"rewards/rejected": -10.897214889526367, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9913846922212511, |
|
"grad_norm": 0.0308837890625, |
|
"learning_rate": 1.0985182093714574e-09, |
|
"logits/chosen": -2.2215476036071777, |
|
"logits/rejected": -2.3835880756378174, |
|
"logps/chosen": -0.42377692461013794, |
|
"logps/rejected": -1237.712646484375, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20775683224201202, |
|
"rewards/margins": 12.072611808776855, |
|
"rewards/rejected": -11.86485481262207, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.993881882881758, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 5.473786683440896e-10, |
|
"logits/chosen": -2.119377613067627, |
|
"logits/rejected": -2.3185195922851562, |
|
"logps/chosen": -1.0564239025115967, |
|
"logps/rejected": -1471.339111328125, |
|
"loss": 0.2259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21374066174030304, |
|
"rewards/margins": 14.393136978149414, |
|
"rewards/rejected": -14.17939567565918, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.996379073542265, |
|
"grad_norm": 0.0311279296875, |
|
"learning_rate": 1.862652812467669e-10, |
|
"logits/chosen": -2.1754400730133057, |
|
"logits/rejected": -2.3970232009887695, |
|
"logps/chosen": -1.259765863418579, |
|
"logps/rejected": -1448.65576171875, |
|
"loss": 0.2242, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2176629602909088, |
|
"rewards/margins": 13.863238334655762, |
|
"rewards/rejected": -13.645576477050781, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9988762642027719, |
|
"grad_norm": 0.035400390625, |
|
"learning_rate": 1.5205502486292932e-11, |
|
"logits/chosen": -2.143209934234619, |
|
"logits/rejected": -2.34411883354187, |
|
"logps/chosen": -0.6734473705291748, |
|
"logps/rejected": -1441.0018310546875, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20581206679344177, |
|
"rewards/margins": 14.11164379119873, |
|
"rewards/rejected": -13.905832290649414, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9988762642027719, |
|
"eval_logits/chosen": -2.571059465408325, |
|
"eval_logits/rejected": -2.6589972972869873, |
|
"eval_logps/chosen": -0.11967950314283371, |
|
"eval_logps/rejected": -652.1184692382812, |
|
"eval_loss": 0.22132086753845215, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.2579382359981537, |
|
"eval_rewards/margins": 6.330402374267578, |
|
"eval_rewards/rejected": -6.072464466094971, |
|
"eval_runtime": 0.656, |
|
"eval_samples_per_second": 7.622, |
|
"eval_steps_per_second": 4.573, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9998751404669747, |
|
"step": 4004, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2426841035559699, |
|
"train_runtime": 8271.4989, |
|
"train_samples_per_second": 1.936, |
|
"train_steps_per_second": 0.484 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4004, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|