{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.06428783416660959, "eval_steps": 500, "global_step": 44, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001461087140150218, "grad_norm": 2.4780876636505127, "learning_rate": 1.4285714285714286e-06, "log_odds_chosen": 0.016244888305664062, "log_odds_ratio": -0.6993749141693115, "logits/chosen": -2.2119967937469482, "logits/rejected": -2.212354898452759, "logps/chosen": -1.583775520324707, "logps/rejected": -1.594750165939331, "loss": 1.8606, "nll_loss": 1.7906217575073242, "rewards/accuracies": 0.515625, "rewards/chosen": -0.1583775281906128, "rewards/margins": 0.0010974762262776494, "rewards/rejected": -0.15947501361370087, "step": 1 }, { "epoch": 0.002922174280300436, "grad_norm": 2.627044200897217, "learning_rate": 2.8571428571428573e-06, "log_odds_chosen": 0.11979679018259048, "log_odds_ratio": -0.6573244333267212, "logits/chosen": -2.2274065017700195, "logits/rejected": -2.243682622909546, "logps/chosen": -1.5664572715759277, "logps/rejected": -1.6658927202224731, "loss": 1.8544, "nll_loss": 1.7886956930160522, "rewards/accuracies": 0.546875, "rewards/chosen": -0.156645730137825, "rewards/margins": 0.009943531826138496, "rewards/rejected": -0.16658926010131836, "step": 2 }, { "epoch": 0.004383261420450654, "grad_norm": 2.113027334213257, "learning_rate": 4.2857142857142855e-06, "log_odds_chosen": 0.058733537793159485, "log_odds_ratio": -0.6822808980941772, "logits/chosen": -2.1965179443359375, "logits/rejected": -2.195549964904785, "logps/chosen": -1.4528911113739014, "logps/rejected": -1.498779296875, "loss": 1.7314, "nll_loss": 1.6631801128387451, "rewards/accuracies": 0.5, "rewards/chosen": -0.1452891230583191, "rewards/margins": 0.004588826093822718, "rewards/rejected": -0.14987793564796448, "step": 3 }, { "epoch": 0.005844348560600872, "grad_norm": 1.8004069328308105, "learning_rate": 5.7142857142857145e-06, "log_odds_chosen": 0.09058406949043274, "log_odds_ratio": -0.6622194051742554, "logits/chosen": -2.240548849105835, "logits/rejected": -2.276327133178711, "logps/chosen": -1.469621181488037, "logps/rejected": -1.5354365110397339, "loss": 1.8061, "nll_loss": 1.7399120330810547, "rewards/accuracies": 0.515625, "rewards/chosen": -0.14696213603019714, "rewards/margins": 0.006581515539437532, "rewards/rejected": -0.1535436362028122, "step": 4 }, { "epoch": 0.00730543570075109, "grad_norm": 1.565532922744751, "learning_rate": 7.1428571428571436e-06, "log_odds_chosen": 0.052982207387685776, "log_odds_ratio": -0.6897823214530945, "logits/chosen": -2.2151541709899902, "logits/rejected": -2.215179681777954, "logps/chosen": -1.5283693075180054, "logps/rejected": -1.5738036632537842, "loss": 1.8656, "nll_loss": 1.7966063022613525, "rewards/accuracies": 0.53125, "rewards/chosen": -0.15283691883087158, "rewards/margins": 0.004543437156826258, "rewards/rejected": -0.1573803573846817, "step": 5 }, { "epoch": 0.008766522840901307, "grad_norm": 1.689719557762146, "learning_rate": 8.571428571428571e-06, "log_odds_chosen": -0.021141668781638145, "log_odds_ratio": -0.7178523540496826, "logits/chosen": -2.180445432662964, "logits/rejected": -2.216461658477783, "logps/chosen": -1.423187255859375, "logps/rejected": -1.4068344831466675, "loss": 1.7435, "nll_loss": 1.671682357788086, "rewards/accuracies": 0.4375, "rewards/chosen": -0.1423187255859375, "rewards/margins": -0.0016352771781384945, "rewards/rejected": -0.14068344235420227, "step": 6 }, { "epoch": 0.010227609981051527, "grad_norm": 1.432656168937683, "learning_rate": 1e-05, "log_odds_chosen": -0.045114632695913315, "log_odds_ratio": -0.7312092781066895, "logits/chosen": -2.2011773586273193, "logits/rejected": -2.201080799102783, "logps/chosen": -1.5241130590438843, "logps/rejected": -1.4893730878829956, "loss": 1.793, "nll_loss": 1.719857931137085, "rewards/accuracies": 0.5, "rewards/chosen": -0.1524112969636917, "rewards/margins": -0.003474000608548522, "rewards/rejected": -0.14893729984760284, "step": 7 }, { "epoch": 0.011688697121201744, "grad_norm": 1.520970106124878, "learning_rate": 9.9999461653818e-06, "log_odds_chosen": -0.08909691870212555, "log_odds_ratio": -0.7692862153053284, "logits/chosen": -2.243410110473633, "logits/rejected": -2.2267463207244873, "logps/chosen": -1.446630597114563, "logps/rejected": -1.396761417388916, "loss": 1.759, "nll_loss": 1.682105541229248, "rewards/accuracies": 0.390625, "rewards/chosen": -0.14466306567192078, "rewards/margins": -0.00498693622648716, "rewards/rejected": -0.13967613875865936, "step": 8 }, { "epoch": 0.013149784261351962, "grad_norm": 1.2674661874771118, "learning_rate": 9.999784662686462e-06, "log_odds_chosen": 0.03795226663351059, "log_odds_ratio": -0.7072824835777283, "logits/chosen": -2.1986892223358154, "logits/rejected": -2.2072536945343018, "logps/chosen": -1.4396700859069824, "logps/rejected": -1.471963882446289, "loss": 1.7775, "nll_loss": 1.7067829370498657, "rewards/accuracies": 0.4375, "rewards/chosen": -0.14396700263023376, "rewards/margins": 0.0032293866388499737, "rewards/rejected": -0.14719641208648682, "step": 9 }, { "epoch": 0.01461087140150218, "grad_norm": 0.984953761100769, "learning_rate": 9.999515495391765e-06, "log_odds_chosen": -0.1402886062860489, "log_odds_ratio": -0.7837027907371521, "logits/chosen": -2.173368453979492, "logits/rejected": -2.152172803878784, "logps/chosen": -1.3456827402114868, "logps/rejected": -1.2599360942840576, "loss": 1.6237, "nll_loss": 1.5452890396118164, "rewards/accuracies": 0.375, "rewards/chosen": -0.13456827402114868, "rewards/margins": -0.008574655279517174, "rewards/rejected": -0.12599360942840576, "step": 10 }, { "epoch": 0.016071958541652397, "grad_norm": 0.9352578520774841, "learning_rate": 9.999138669293913e-06, "log_odds_chosen": 0.030955376103520393, "log_odds_ratio": -0.6955101490020752, "logits/chosen": -2.242959976196289, "logits/rejected": -2.2378480434417725, "logps/chosen": -1.3454885482788086, "logps/rejected": -1.3571863174438477, "loss": 1.6984, "nll_loss": 1.6288717985153198, "rewards/accuracies": 0.453125, "rewards/chosen": -0.1345488578081131, "rewards/margins": 0.0011697756126523018, "rewards/rejected": -0.13571862876415253, "step": 11 }, { "epoch": 0.017533045681802615, "grad_norm": 0.7604814171791077, "learning_rate": 9.998654192507421e-06, "log_odds_chosen": -0.025142917409539223, "log_odds_ratio": -0.7221760153770447, "logits/chosen": -2.1557188034057617, "logits/rejected": -2.181380271911621, "logps/chosen": -1.3569337129592896, "logps/rejected": -1.342216968536377, "loss": 1.6662, "nll_loss": 1.59402596950531, "rewards/accuracies": 0.4375, "rewards/chosen": -0.13569337129592896, "rewards/margins": -0.0014716808218508959, "rewards/rejected": -0.13422170281410217, "step": 12 }, { "epoch": 0.018994132821952833, "grad_norm": 0.7423222064971924, "learning_rate": 9.998062075464943e-06, "log_odds_chosen": -0.11804741621017456, "log_odds_ratio": -0.7873090505599976, "logits/chosen": -2.200981616973877, "logits/rejected": -2.1934943199157715, "logps/chosen": -1.2868025302886963, "logps/rejected": -1.2002918720245361, "loss": 1.6073, "nll_loss": 1.5285530090332031, "rewards/accuracies": 0.3125, "rewards/chosen": -0.1286802589893341, "rewards/margins": -0.008651047013700008, "rewards/rejected": -0.12002921104431152, "step": 13 }, { "epoch": 0.020455219962103054, "grad_norm": 0.6683437824249268, "learning_rate": 9.997362330917032e-06, "log_odds_chosen": -0.012934267520904541, "log_odds_ratio": -0.7275698184967041, "logits/chosen": -2.1948914527893066, "logits/rejected": -2.200413227081299, "logps/chosen": -1.3400592803955078, "logps/rejected": -1.3343003988265991, "loss": 1.6521, "nll_loss": 1.5793243646621704, "rewards/accuracies": 0.453125, "rewards/chosen": -0.13400591909885406, "rewards/margins": -0.0005758859915658832, "rewards/rejected": -0.13343004882335663, "step": 14 }, { "epoch": 0.02191630710225327, "grad_norm": 0.6963202357292175, "learning_rate": 9.996554973931884e-06, "log_odds_chosen": -0.11072862148284912, "log_odds_ratio": -0.7919189929962158, "logits/chosen": -2.1773252487182617, "logits/rejected": -2.162838935852051, "logps/chosen": -1.305843710899353, "logps/rejected": -1.2427345514297485, "loss": 1.6167, "nll_loss": 1.5374785661697388, "rewards/accuracies": 0.359375, "rewards/chosen": -0.13058437407016754, "rewards/margins": -0.006310915574431419, "rewards/rejected": -0.12427344918251038, "step": 15 }, { "epoch": 0.02337739424240349, "grad_norm": 0.6812885999679565, "learning_rate": 9.995640021894996e-06, "log_odds_chosen": -0.03462236002087593, "log_odds_ratio": -0.7293962836265564, "logits/chosen": -2.1697518825531006, "logits/rejected": -2.1682655811309814, "logps/chosen": -1.2563740015029907, "logps/rejected": -1.2293565273284912, "loss": 1.5666, "nll_loss": 1.4936596155166626, "rewards/accuracies": 0.40625, "rewards/chosen": -0.12563739717006683, "rewards/margins": -0.0027017316315323114, "rewards/rejected": -0.12293566763401031, "step": 16 }, { "epoch": 0.024838481382553707, "grad_norm": 0.6587896943092346, "learning_rate": 9.994617494508811e-06, "log_odds_chosen": -0.010003458708524704, "log_odds_ratio": -0.7430539727210999, "logits/chosen": -2.200150966644287, "logits/rejected": -2.1906399726867676, "logps/chosen": -1.2153267860412598, "logps/rejected": -1.1962745189666748, "loss": 1.5641, "nll_loss": 1.4898183345794678, "rewards/accuracies": 0.390625, "rewards/chosen": -0.12153266370296478, "rewards/margins": -0.001905218348838389, "rewards/rejected": -0.119627445936203, "step": 17 }, { "epoch": 0.026299568522703924, "grad_norm": 0.5830309987068176, "learning_rate": 9.993487413792276e-06, "log_odds_chosen": -0.1340780407190323, "log_odds_ratio": -0.7850849628448486, "logits/chosen": -2.1663918495178223, "logits/rejected": -2.145009756088257, "logps/chosen": -1.2580608129501343, "logps/rejected": -1.188957929611206, "loss": 1.5591, "nll_loss": 1.4806358814239502, "rewards/accuracies": 0.34375, "rewards/chosen": -0.1258060783147812, "rewards/margins": -0.006910297088325024, "rewards/rejected": -0.11889579892158508, "step": 18 }, { "epoch": 0.027760655662854142, "grad_norm": 0.5536736249923706, "learning_rate": 9.992249804080372e-06, "log_odds_chosen": -0.021063022315502167, "log_odds_ratio": -0.7351462244987488, "logits/chosen": -2.1682121753692627, "logits/rejected": -2.163313150405884, "logps/chosen": -1.2965900897979736, "logps/rejected": -1.2690778970718384, "loss": 1.6294, "nll_loss": 1.5559338331222534, "rewards/accuracies": 0.421875, "rewards/chosen": -0.1296590119600296, "rewards/margins": -0.0027512230444699526, "rewards/rejected": -0.12690778076648712, "step": 19 }, { "epoch": 0.02922174280300436, "grad_norm": 0.5634390711784363, "learning_rate": 9.990904692023604e-06, "log_odds_chosen": 0.025263303890824318, "log_odds_ratio": -0.7174615263938904, "logits/chosen": -2.26141357421875, "logits/rejected": -2.250270366668701, "logps/chosen": -1.3806607723236084, "logps/rejected": -1.3875806331634521, "loss": 1.7203, "nll_loss": 1.6485247611999512, "rewards/accuracies": 0.375, "rewards/chosen": -0.13806606829166412, "rewards/margins": 0.0006919947918504477, "rewards/rejected": -0.1387580782175064, "step": 20 }, { "epoch": 0.030682829943154577, "grad_norm": 0.5434465408325195, "learning_rate": 9.989452106587406e-06, "log_odds_chosen": 0.012862562201917171, "log_odds_ratio": -0.7206666469573975, "logits/chosen": -2.1785006523132324, "logits/rejected": -2.181265354156494, "logps/chosen": -1.2523807287216187, "logps/rejected": -1.2531023025512695, "loss": 1.5781, "nll_loss": 1.5060465335845947, "rewards/accuracies": 0.421875, "rewards/chosen": -0.1252380609512329, "rewards/margins": 7.216550875455141e-05, "rewards/rejected": -0.1253102421760559, "step": 21 }, { "epoch": 0.032143917083304795, "grad_norm": 0.6211642026901245, "learning_rate": 9.987892079051531e-06, "log_odds_chosen": 0.012624351307749748, "log_odds_ratio": -0.7291357517242432, "logits/chosen": -2.179482936859131, "logits/rejected": -2.176547050476074, "logps/chosen": -1.3581693172454834, "logps/rejected": -1.3609216213226318, "loss": 1.6565, "nll_loss": 1.5835901498794556, "rewards/accuracies": 0.359375, "rewards/chosen": -0.13581693172454834, "rewards/margins": 0.0002752433065325022, "rewards/rejected": -0.1360921710729599, "step": 22 }, { "epoch": 0.03360500422345501, "grad_norm": 0.769701361656189, "learning_rate": 9.986224643009375e-06, "log_odds_chosen": 0.03776140883564949, "log_odds_ratio": -0.6954489946365356, "logits/chosen": -2.1131434440612793, "logits/rejected": -2.1170055866241455, "logps/chosen": -1.2192529439926147, "logps/rejected": -1.2428308725357056, "loss": 1.519, "nll_loss": 1.4494192600250244, "rewards/accuracies": 0.453125, "rewards/chosen": -0.12192529439926147, "rewards/margins": 0.0023577904794365168, "rewards/rejected": -0.1242830902338028, "step": 23 }, { "epoch": 0.03506609136360523, "grad_norm": 0.6579698324203491, "learning_rate": 9.984449834367251e-06, "log_odds_chosen": -0.08287765085697174, "log_odds_ratio": -0.7655948400497437, "logits/chosen": -2.117600440979004, "logits/rejected": -2.118288993835449, "logps/chosen": -1.3066436052322388, "logps/rejected": -1.2814935445785522, "loss": 1.5987, "nll_loss": 1.5221776962280273, "rewards/accuracies": 0.46875, "rewards/chosen": -0.13066436350345612, "rewards/margins": -0.0025150016881525517, "rewards/rejected": -0.1281493604183197, "step": 24 }, { "epoch": 0.03652717850375545, "grad_norm": 0.5370662808418274, "learning_rate": 9.982567691343617e-06, "log_odds_chosen": -0.041301436722278595, "log_odds_ratio": -0.7494814395904541, "logits/chosen": -2.1119844913482666, "logits/rejected": -2.1228156089782715, "logps/chosen": -1.2678842544555664, "logps/rejected": -1.2463194131851196, "loss": 1.5457, "nll_loss": 1.4707541465759277, "rewards/accuracies": 0.34375, "rewards/chosen": -0.1267884224653244, "rewards/margins": -0.0021564930211752653, "rewards/rejected": -0.12463192641735077, "step": 25 }, { "epoch": 0.037988265643905665, "grad_norm": 0.5143423080444336, "learning_rate": 9.980578254468252e-06, "log_odds_chosen": -0.05193250998854637, "log_odds_ratio": -0.7482390403747559, "logits/chosen": -2.1540181636810303, "logits/rejected": -2.136885166168213, "logps/chosen": -1.3168916702270508, "logps/rejected": -1.2728286981582642, "loss": 1.6123, "nll_loss": 1.5374691486358643, "rewards/accuracies": 0.40625, "rewards/chosen": -0.1316891610622406, "rewards/margins": -0.004406292457133532, "rewards/rejected": -0.12728287279605865, "step": 26 }, { "epoch": 0.03944935278405589, "grad_norm": 0.5746509432792664, "learning_rate": 9.978481566581388e-06, "log_odds_chosen": -0.09941092133522034, "log_odds_ratio": -0.7670192122459412, "logits/chosen": -2.0658328533172607, "logits/rejected": -2.053611993789673, "logps/chosen": -1.3229994773864746, "logps/rejected": -1.2689064741134644, "loss": 1.5754, "nll_loss": 1.4986768960952759, "rewards/accuracies": 0.265625, "rewards/chosen": -0.13229995965957642, "rewards/margins": -0.005409288220107555, "rewards/rejected": -0.1268906593322754, "step": 27 }, { "epoch": 0.04091043992420611, "grad_norm": 0.5285203456878662, "learning_rate": 9.976277672832782e-06, "log_odds_chosen": -0.009130319580435753, "log_odds_ratio": -0.7242738008499146, "logits/chosen": -2.119077682495117, "logits/rejected": -2.109398365020752, "logps/chosen": -1.311572790145874, "logps/rejected": -1.3026206493377686, "loss": 1.596, "nll_loss": 1.5235313177108765, "rewards/accuracies": 0.28125, "rewards/chosen": -0.1311572790145874, "rewards/margins": -0.0008952060015872121, "rewards/rejected": -0.1302620768547058, "step": 28 }, { "epoch": 0.042371527064356325, "grad_norm": 0.5054935812950134, "learning_rate": 9.973966620680741e-06, "log_odds_chosen": 9.676720947027206e-05, "log_odds_ratio": -0.7112984657287598, "logits/chosen": -2.0804967880249023, "logits/rejected": -2.0964128971099854, "logps/chosen": -1.3590530157089233, "logps/rejected": -1.3668309450149536, "loss": 1.6081, "nll_loss": 1.5369728803634644, "rewards/accuracies": 0.46875, "rewards/chosen": -0.13590531051158905, "rewards/margins": 0.0007777921855449677, "rewards/rejected": -0.13668310642242432, "step": 29 }, { "epoch": 0.04383261420450654, "grad_norm": 0.5947529077529907, "learning_rate": 9.971548459891113e-06, "log_odds_chosen": -0.005686625838279724, "log_odds_ratio": -0.7236043810844421, "logits/chosen": -2.040478229522705, "logits/rejected": -2.050001859664917, "logps/chosen": -1.3079044818878174, "logps/rejected": -1.3112431764602661, "loss": 1.6105, "nll_loss": 1.5381548404693604, "rewards/accuracies": 0.390625, "rewards/chosen": -0.13079045712947845, "rewards/margins": 0.0003338647074997425, "rewards/rejected": -0.1311243176460266, "step": 30 }, { "epoch": 0.04529370134465676, "grad_norm": 0.541822075843811, "learning_rate": 9.969023242536206e-06, "log_odds_chosen": -0.03166639059782028, "log_odds_ratio": -0.7292428612709045, "logits/chosen": -2.040898323059082, "logits/rejected": -2.0398669242858887, "logps/chosen": -1.2686175107955933, "logps/rejected": -1.261348843574524, "loss": 1.541, "nll_loss": 1.4681065082550049, "rewards/accuracies": 0.484375, "rewards/chosen": -0.12686176598072052, "rewards/margins": -0.0007268765475600958, "rewards/rejected": -0.12613488733768463, "step": 31 }, { "epoch": 0.04675478848480698, "grad_norm": 0.6896752715110779, "learning_rate": 9.966391022993658e-06, "log_odds_chosen": 0.13857628405094147, "log_odds_ratio": -0.6746503114700317, "logits/chosen": -2.005702495574951, "logits/rejected": -2.009239912033081, "logps/chosen": -1.14279043674469, "logps/rejected": -1.2037732601165771, "loss": 1.475, "nll_loss": 1.4074950218200684, "rewards/accuracies": 0.484375, "rewards/chosen": -0.11427903920412064, "rewards/margins": 0.00609829043969512, "rewards/rejected": -0.12037733197212219, "step": 32 }, { "epoch": 0.048215875624957195, "grad_norm": 0.4958839416503906, "learning_rate": 9.963651857945286e-06, "log_odds_chosen": -0.07793000340461731, "log_odds_ratio": -0.7607824206352234, "logits/chosen": -2.067959785461426, "logits/rejected": -2.06540584564209, "logps/chosen": -1.3159579038619995, "logps/rejected": -1.2743453979492188, "loss": 1.657, "nll_loss": 1.5809520483016968, "rewards/accuracies": 0.390625, "rewards/chosen": -0.13159577548503876, "rewards/margins": -0.004161248914897442, "rewards/rejected": -0.12743453681468964, "step": 33 }, { "epoch": 0.04967696276510741, "grad_norm": 0.530023992061615, "learning_rate": 9.960805806375854e-06, "log_odds_chosen": 0.017400667071342468, "log_odds_ratio": -0.7287170886993408, "logits/chosen": -2.049931526184082, "logits/rejected": -2.067579984664917, "logps/chosen": -1.313307285308838, "logps/rejected": -1.3060802221298218, "loss": 1.5791, "nll_loss": 1.506237268447876, "rewards/accuracies": 0.390625, "rewards/chosen": -0.1313307285308838, "rewards/margins": -0.0007226967136375606, "rewards/rejected": -0.13060802221298218, "step": 34 }, { "epoch": 0.05113804990525763, "grad_norm": 0.5267317891120911, "learning_rate": 9.957852929571799e-06, "log_odds_chosen": -0.08755096048116684, "log_odds_ratio": -0.7537316679954529, "logits/chosen": -2.0170559883117676, "logits/rejected": -2.0241918563842773, "logps/chosen": -1.3282378911972046, "logps/rejected": -1.267946481704712, "loss": 1.5959, "nll_loss": 1.5205148458480835, "rewards/accuracies": 0.375, "rewards/chosen": -0.13282378017902374, "rewards/margins": -0.006029147654771805, "rewards/rejected": -0.12679465115070343, "step": 35 }, { "epoch": 0.05259913704540785, "grad_norm": 0.5598548054695129, "learning_rate": 9.954793291119917e-06, "log_odds_chosen": -0.07335270196199417, "log_odds_ratio": -0.7724658846855164, "logits/chosen": -2.0005574226379395, "logits/rejected": -1.985095739364624, "logps/chosen": -1.2228074073791504, "logps/rejected": -1.1555684804916382, "loss": 1.5164, "nll_loss": 1.4391915798187256, "rewards/accuracies": 0.359375, "rewards/chosen": -0.12228074669837952, "rewards/margins": -0.006723896134644747, "rewards/rejected": -0.11555685847997665, "step": 36 }, { "epoch": 0.054060224185558066, "grad_norm": 0.5158045887947083, "learning_rate": 9.951626956906001e-06, "log_odds_chosen": -0.1070481538772583, "log_odds_ratio": -0.767937421798706, "logits/chosen": -2.022592067718506, "logits/rejected": -2.0269908905029297, "logps/chosen": -1.3267402648925781, "logps/rejected": -1.2489224672317505, "loss": 1.5828, "nll_loss": 1.5060540437698364, "rewards/accuracies": 0.390625, "rewards/chosen": -0.13267403841018677, "rewards/margins": -0.0077817970886826515, "rewards/rejected": -0.1248922348022461, "step": 37 }, { "epoch": 0.055521311325708284, "grad_norm": 0.5369778275489807, "learning_rate": 9.948353995113405e-06, "log_odds_chosen": 0.02357397973537445, "log_odds_ratio": -0.7277624011039734, "logits/chosen": -1.971970796585083, "logits/rejected": -2.0005533695220947, "logps/chosen": -1.190409541130066, "logps/rejected": -1.1846762895584106, "loss": 1.4772, "nll_loss": 1.404414176940918, "rewards/accuracies": 0.375, "rewards/chosen": -0.11904095858335495, "rewards/margins": -0.0005733318976126611, "rewards/rejected": -0.11846762895584106, "step": 38 }, { "epoch": 0.0569823984658585, "grad_norm": 0.5577079057693481, "learning_rate": 9.944974476221587e-06, "log_odds_chosen": 0.05960956960916519, "log_odds_ratio": -0.694072425365448, "logits/chosen": -2.0205910205841064, "logits/rejected": -2.010025978088379, "logps/chosen": -1.2545329332351685, "logps/rejected": -1.2769423723220825, "loss": 1.5708, "nll_loss": 1.5014022588729858, "rewards/accuracies": 0.46875, "rewards/chosen": -0.12545329332351685, "rewards/margins": 0.002240956760942936, "rewards/rejected": -0.1276942491531372, "step": 39 }, { "epoch": 0.05844348560600872, "grad_norm": 0.5200385451316833, "learning_rate": 9.941488473004594e-06, "log_odds_chosen": -0.016573341563344002, "log_odds_ratio": -0.7171596884727478, "logits/chosen": -2.0057106018066406, "logits/rejected": -2.004528760910034, "logps/chosen": -1.4052870273590088, "logps/rejected": -1.3805549144744873, "loss": 1.6515, "nll_loss": 1.5797675848007202, "rewards/accuracies": 0.375, "rewards/chosen": -0.14052869379520416, "rewards/margins": -0.0024731969460844994, "rewards/rejected": -0.13805550336837769, "step": 40 }, { "epoch": 0.059904572746158936, "grad_norm": 0.5273575186729431, "learning_rate": 9.937896060529485e-06, "log_odds_chosen": -0.08113294094800949, "log_odds_ratio": -0.7743253707885742, "logits/chosen": -1.9785962104797363, "logits/rejected": -1.9705551862716675, "logps/chosen": -1.2821004390716553, "logps/rejected": -1.208033561706543, "loss": 1.5836, "nll_loss": 1.5061376094818115, "rewards/accuracies": 0.234375, "rewards/chosen": -0.12821003794670105, "rewards/margins": -0.007406666409224272, "rewards/rejected": -0.12080337107181549, "step": 41 }, { "epoch": 0.061365659886309154, "grad_norm": 0.5236508250236511, "learning_rate": 9.934197316154721e-06, "log_odds_chosen": -0.13003796339035034, "log_odds_ratio": -0.7939882278442383, "logits/chosen": -1.9101815223693848, "logits/rejected": -1.943664312362671, "logps/chosen": -1.155348777770996, "logps/rejected": -1.0918152332305908, "loss": 1.4569, "nll_loss": 1.377458095550537, "rewards/accuracies": 0.328125, "rewards/chosen": -0.11553487926721573, "rewards/margins": -0.006353363860398531, "rewards/rejected": -0.10918151587247849, "step": 42 }, { "epoch": 0.06282674702645938, "grad_norm": 0.4699207842350006, "learning_rate": 9.9303923195285e-06, "log_odds_chosen": -0.041184213012456894, "log_odds_ratio": -0.7406144142150879, "logits/chosen": -1.9506869316101074, "logits/rejected": -1.9735560417175293, "logps/chosen": -1.2942121028900146, "logps/rejected": -1.2584986686706543, "loss": 1.5464, "nll_loss": 1.4723409414291382, "rewards/accuracies": 0.4375, "rewards/chosen": -0.12942121922969818, "rewards/margins": -0.0035713440738618374, "rewards/rejected": -0.1258498728275299, "step": 43 }, { "epoch": 0.06428783416660959, "grad_norm": 0.5235589742660522, "learning_rate": 9.92648115258704e-06, "log_odds_chosen": 0.04650488868355751, "log_odds_ratio": -0.7010443806648254, "logits/chosen": -1.9501118659973145, "logits/rejected": -1.9561800956726074, "logps/chosen": -1.1499981880187988, "logps/rejected": -1.1835464239120483, "loss": 1.4224, "nll_loss": 1.3523142337799072, "rewards/accuracies": 0.4375, "rewards/chosen": -0.11499983072280884, "rewards/margins": 0.003354821354150772, "rewards/rejected": -0.11835464835166931, "step": 44 } ], "logging_steps": 1.0, "max_steps": 684, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }