{ "best_metric": 0.8980213403701782, "best_model_checkpoint": "./mistral-7b-v0.1-orpo/checkpoint-211", "epoch": 3.0, "eval_steps": 500, "global_step": 633, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 39.94051543947401, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": 0.2551603317260742, "log_odds_ratio": -0.7050353288650513, "logits/chosen": -2.8500335216522217, "logits/rejected": -2.8525497913360596, "logps/chosen": -1.2754234075546265, "logps/rejected": -1.5109716653823853, "loss": 1.1661, "nll_loss": 1.1492516994476318, "rewards/accuracies": 0.5625, "rewards/chosen": -0.06377117335796356, "rewards/margins": 0.011777402833104134, "rewards/rejected": -0.07554857432842255, "step": 10 }, { "epoch": 0.09, "grad_norm": 15.606195494341659, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": 0.19387400150299072, "log_odds_ratio": -0.7067446708679199, "logits/chosen": -2.8987910747528076, "logits/rejected": -2.9010841846466064, "logps/chosen": -0.9522870779037476, "logps/rejected": -1.0993469953536987, "loss": 0.9734, "nll_loss": 0.9094793200492859, "rewards/accuracies": 0.5, "rewards/chosen": -0.047614358365535736, "rewards/margins": 0.0073529938235878944, "rewards/rejected": -0.054967351257801056, "step": 20 }, { "epoch": 0.14, "grad_norm": 10.36315915115552, "learning_rate": 1.5e-06, "log_odds_chosen": 0.21015071868896484, "log_odds_ratio": -0.6918584108352661, "logits/chosen": -2.9974679946899414, "logits/rejected": -2.9977316856384277, "logps/chosen": -0.9473606944084167, "logps/rejected": -1.0642311573028564, "loss": 0.9344, "nll_loss": 0.8887130618095398, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.047368038445711136, "rewards/margins": 0.005843517370522022, "rewards/rejected": -0.05321155861020088, "step": 30 }, { "epoch": 0.19, "grad_norm": 12.001709174217318, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.10945004224777222, "log_odds_ratio": -0.7249928712844849, "logits/chosen": -2.8161749839782715, "logits/rejected": -2.8172993659973145, "logps/chosen": -0.9179791212081909, "logps/rejected": -0.9879155158996582, "loss": 0.9444, "nll_loss": 0.8832181692123413, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.04589895159006119, "rewards/margins": 0.003496820107102394, "rewards/rejected": -0.04939577355980873, "step": 40 }, { "epoch": 0.24, "grad_norm": 12.504768280715709, "learning_rate": 2.5e-06, "log_odds_chosen": 0.4096715450286865, "log_odds_ratio": -0.6319375038146973, "logits/chosen": -2.701756000518799, "logits/rejected": -2.713608980178833, "logps/chosen": -0.8992551565170288, "logps/rejected": -1.1893055438995361, "loss": 0.9242, "nll_loss": 0.9320483207702637, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.04496275633573532, "rewards/margins": 0.014502523466944695, "rewards/rejected": -0.059465277940034866, "step": 50 }, { "epoch": 0.28, "grad_norm": 9.02387893392391, "learning_rate": 3e-06, "log_odds_chosen": 0.2635055184364319, "log_odds_ratio": -0.6361549496650696, "logits/chosen": -2.6746716499328613, "logits/rejected": -2.6736464500427246, "logps/chosen": -0.7554010152816772, "logps/rejected": -0.9083536863327026, "loss": 0.8533, "nll_loss": 0.7264706492424011, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03777005523443222, "rewards/margins": 0.007647632621228695, "rewards/rejected": -0.04541768878698349, "step": 60 }, { "epoch": 0.33, "grad_norm": 10.698033738270377, "learning_rate": 3.5e-06, "log_odds_chosen": 0.30966854095458984, "log_odds_ratio": -0.654030442237854, "logits/chosen": -2.7319040298461914, "logits/rejected": -2.737243175506592, "logps/chosen": -0.84112948179245, "logps/rejected": -1.0554386377334595, "loss": 0.9031, "nll_loss": 0.8661483526229858, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.042056478559970856, "rewards/margins": 0.010715454816818237, "rewards/rejected": -0.05277193710207939, "step": 70 }, { "epoch": 0.38, "grad_norm": 9.996385073585538, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": -0.006746725644916296, "log_odds_ratio": -0.775671124458313, "logits/chosen": -2.8110978603363037, "logits/rejected": -2.812554121017456, "logps/chosen": -0.8807821273803711, "logps/rejected": -0.883065402507782, "loss": 0.8813, "nll_loss": 0.9144401550292969, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.044039104133844376, "rewards/margins": 0.00011416729830671102, "rewards/rejected": -0.04415327310562134, "step": 80 }, { "epoch": 0.43, "grad_norm": 9.119674780883251, "learning_rate": 4.5e-06, "log_odds_chosen": 0.2785256505012512, "log_odds_ratio": -0.6412050127983093, "logits/chosen": -2.8578667640686035, "logits/rejected": -2.843656063079834, "logps/chosen": -0.8698889017105103, "logps/rejected": -1.0659993886947632, "loss": 0.8932, "nll_loss": 0.8667906522750854, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04349444434046745, "rewards/margins": 0.009805524721741676, "rewards/rejected": -0.05329997092485428, "step": 90 }, { "epoch": 0.47, "grad_norm": 12.306871572257608, "learning_rate": 5e-06, "log_odds_chosen": 0.3695891201496124, "log_odds_ratio": -0.6482647061347961, "logits/chosen": -2.7652063369750977, "logits/rejected": -2.789933681488037, "logps/chosen": -0.878996729850769, "logps/rejected": -1.082092046737671, "loss": 0.9352, "nll_loss": 0.9471464157104492, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.04394983872771263, "rewards/margins": 0.01015476230531931, "rewards/rejected": -0.054104603826999664, "step": 100 }, { "epoch": 0.52, "grad_norm": 8.587658638329193, "learning_rate": 4.995658601978859e-06, "log_odds_chosen": 0.3208955228328705, "log_odds_ratio": -0.624093234539032, "logits/chosen": -2.75911808013916, "logits/rejected": -2.7485742568969727, "logps/chosen": -0.7625306844711304, "logps/rejected": -0.950202465057373, "loss": 0.9039, "nll_loss": 0.8189178705215454, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03812653571367264, "rewards/margins": 0.009383587166666985, "rewards/rejected": -0.04751012474298477, "step": 110 }, { "epoch": 0.57, "grad_norm": 9.573968736057266, "learning_rate": 4.982649486104858e-06, "log_odds_chosen": 0.2796480059623718, "log_odds_ratio": -0.6660342216491699, "logits/chosen": -2.8325819969177246, "logits/rejected": -2.830364942550659, "logps/chosen": -0.8180511593818665, "logps/rejected": -0.9945104718208313, "loss": 0.9465, "nll_loss": 0.8578864932060242, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.040902554988861084, "rewards/margins": 0.008822967298328876, "rewards/rejected": -0.049725525081157684, "step": 120 }, { "epoch": 0.62, "grad_norm": 10.453345314275992, "learning_rate": 4.961017834577927e-06, "log_odds_chosen": 0.2720031440258026, "log_odds_ratio": -0.656061053276062, "logits/chosen": -2.846191644668579, "logits/rejected": -2.8502097129821777, "logps/chosen": -0.8261640667915344, "logps/rejected": -0.9698032140731812, "loss": 0.9689, "nll_loss": 0.9089719653129578, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0413082018494606, "rewards/margins": 0.007181968539953232, "rewards/rejected": -0.048490170389413834, "step": 130 }, { "epoch": 0.66, "grad_norm": 8.571550283980045, "learning_rate": 4.930838776685373e-06, "log_odds_chosen": 0.30521708726882935, "log_odds_ratio": -0.6360923647880554, "logits/chosen": -2.8390443325042725, "logits/rejected": -2.8424527645111084, "logps/chosen": -0.8498956561088562, "logps/rejected": -1.071276068687439, "loss": 0.9233, "nll_loss": 0.8490797281265259, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04249478504061699, "rewards/margins": 0.011069012805819511, "rewards/rejected": -0.05356380343437195, "step": 140 }, { "epoch": 0.71, "grad_norm": 9.244452429837853, "learning_rate": 4.8922171278689655e-06, "log_odds_chosen": 0.38359910249710083, "log_odds_ratio": -0.6358404159545898, "logits/chosen": -2.866654396057129, "logits/rejected": -2.858891487121582, "logps/chosen": -0.8682330250740051, "logps/rejected": -1.1270641088485718, "loss": 0.9085, "nll_loss": 0.8730288743972778, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04341164976358414, "rewards/margins": 0.01294155977666378, "rewards/rejected": -0.056353211402893066, "step": 150 }, { "epoch": 0.76, "grad_norm": 10.379571331754194, "learning_rate": 4.845287025688503e-06, "log_odds_chosen": 0.29743850231170654, "log_odds_ratio": -0.650119423866272, "logits/chosen": -2.8795416355133057, "logits/rejected": -2.881042957305908, "logps/chosen": -0.8429490923881531, "logps/rejected": -1.049843668937683, "loss": 0.8767, "nll_loss": 0.8780015110969543, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.04214745759963989, "rewards/margins": 0.010344731621444225, "rewards/rejected": -0.052492182701826096, "step": 160 }, { "epoch": 0.81, "grad_norm": 8.62227952312482, "learning_rate": 4.790211463946174e-06, "log_odds_chosen": 0.10580413043498993, "log_odds_ratio": -0.7094690203666687, "logits/chosen": -2.745680093765259, "logits/rejected": -2.751802921295166, "logps/chosen": -0.8849539756774902, "logps/rejected": -0.9533231854438782, "loss": 0.8963, "nll_loss": 0.8941303491592407, "rewards/accuracies": 0.5625, "rewards/chosen": -0.044247694313526154, "rewards/margins": 0.0034184574615210295, "rewards/rejected": -0.04766615480184555, "step": 170 }, { "epoch": 0.85, "grad_norm": 10.04192028153657, "learning_rate": 4.727181726589789e-06, "log_odds_chosen": 0.1737401783466339, "log_odds_ratio": -0.7261314988136292, "logits/chosen": -2.7415287494659424, "logits/rejected": -2.7601096630096436, "logps/chosen": -0.8834375143051147, "logps/rejected": -1.0135564804077148, "loss": 0.9263, "nll_loss": 0.9143431782722473, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.044171880930662155, "rewards/margins": 0.006505946163088083, "rewards/rejected": -0.0506778284907341, "step": 180 }, { "epoch": 0.9, "grad_norm": 9.615534174630943, "learning_rate": 4.656416723360974e-06, "log_odds_chosen": 0.24925272166728973, "log_odds_ratio": -0.6496003866195679, "logits/chosen": -2.6970863342285156, "logits/rejected": -2.6921534538269043, "logps/chosen": -0.8308185338973999, "logps/rejected": -0.9803632497787476, "loss": 0.9089, "nll_loss": 0.8415037393569946, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04154093191027641, "rewards/margins": 0.0074772341176867485, "rewards/rejected": -0.04901815950870514, "step": 190 }, { "epoch": 0.95, "grad_norm": 9.799715635757147, "learning_rate": 4.578162229495714e-06, "log_odds_chosen": 0.17407485842704773, "log_odds_ratio": -0.6980692148208618, "logits/chosen": -2.762956142425537, "logits/rejected": -2.768340826034546, "logps/chosen": -0.8834056854248047, "logps/rejected": -1.005025863647461, "loss": 0.9565, "nll_loss": 0.9161331057548523, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.044170282781124115, "rewards/margins": 0.006081007421016693, "rewards/rejected": -0.05025129392743111, "step": 200 }, { "epoch": 1.0, "grad_norm": 9.940266165525735, "learning_rate": 4.492690032117859e-06, "log_odds_chosen": 0.1925085484981537, "log_odds_ratio": -0.6957636475563049, "logits/chosen": -2.753004550933838, "logits/rejected": -2.742924690246582, "logps/chosen": -0.9696134328842163, "logps/rejected": -1.1040910482406616, "loss": 0.974, "nll_loss": 0.9876837730407715, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.04848066717386246, "rewards/margins": 0.006723886821419001, "rewards/rejected": -0.05520455911755562, "step": 210 }, { "epoch": 1.0, "eval_log_odds_chosen": 0.35508599877357483, "eval_log_odds_ratio": -0.6352247595787048, "eval_logits/chosen": -2.751180410385132, "eval_logits/rejected": -2.754164934158325, "eval_logps/chosen": -0.8390052914619446, "eval_logps/rejected": -1.0409260988235474, "eval_loss": 0.8980213403701782, "eval_nll_loss": 0.8887932896614075, "eval_rewards/accuracies": 0.6145833134651184, "eval_rewards/chosen": -0.04195027053356171, "eval_rewards/margins": 0.01009603776037693, "eval_rewards/rejected": -0.05204630270600319, "eval_runtime": 57.3212, "eval_samples_per_second": 13.084, "eval_steps_per_second": 0.419, "step": 211 }, { "epoch": 1.04, "grad_norm": 8.638509296818858, "learning_rate": 4.400296986290258e-06, "log_odds_chosen": 1.3975646495819092, "log_odds_ratio": -0.32538530230522156, "logits/chosen": -2.7828710079193115, "logits/rejected": -2.7821409702301025, "logps/chosen": -0.446835994720459, "logps/rejected": -1.1457499265670776, "loss": 0.5455, "nll_loss": 0.4835132956504822, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.02234180085361004, "rewards/margins": 0.03494570404291153, "rewards/rejected": -0.05728749558329582, "step": 220 }, { "epoch": 1.09, "grad_norm": 9.543274613242575, "learning_rate": 4.3013039840019675e-06, "log_odds_chosen": 1.6737674474716187, "log_odds_ratio": -0.23729205131530762, "logits/chosen": -2.860888957977295, "logits/rejected": -2.84743332862854, "logps/chosen": -0.4813949167728424, "logps/rejected": -1.4018938541412354, "loss": 0.5041, "nll_loss": 0.5257013440132141, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.02406974695622921, "rewards/margins": 0.04602494835853577, "rewards/rejected": -0.07009468972682953, "step": 230 }, { "epoch": 1.14, "grad_norm": 9.413079564715025, "learning_rate": 4.196054839672382e-06, "log_odds_chosen": 1.4506713151931763, "log_odds_ratio": -0.31961891055107117, "logits/chosen": -2.848227024078369, "logits/rejected": -2.849867820739746, "logps/chosen": -0.46944475173950195, "logps/rejected": -1.153623342514038, "loss": 0.4801, "nll_loss": 0.4581332802772522, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.023472238332033157, "rewards/margins": 0.03420892730355263, "rewards/rejected": -0.057681165635585785, "step": 240 }, { "epoch": 1.18, "grad_norm": 9.405658606526101, "learning_rate": 4.084915096043035e-06, "log_odds_chosen": 1.3965736627578735, "log_odds_ratio": -0.3138691484928131, "logits/chosen": -2.8238320350646973, "logits/rejected": -2.811922311782837, "logps/chosen": -0.48123639822006226, "logps/rejected": -1.1604976654052734, "loss": 0.5396, "nll_loss": 0.5218371748924255, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.024061817675828934, "rewards/margins": 0.03396306931972504, "rewards/rejected": -0.05802489444613457, "step": 250 }, { "epoch": 1.23, "grad_norm": 9.830258381762862, "learning_rate": 3.9682707546043785e-06, "log_odds_chosen": 1.5788991451263428, "log_odds_ratio": -0.28013017773628235, "logits/chosen": -2.7972917556762695, "logits/rejected": -2.7948007583618164, "logps/chosen": -0.4086516797542572, "logps/rejected": -1.1518056392669678, "loss": 0.4785, "nll_loss": 0.4458548128604889, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.02043258398771286, "rewards/margins": 0.03715769201517105, "rewards/rejected": -0.05759027600288391, "step": 260 }, { "epoch": 1.28, "grad_norm": 8.095849996024462, "learning_rate": 3.846526934966891e-06, "log_odds_chosen": 1.360436201095581, "log_odds_ratio": -0.33352774381637573, "logits/chosen": -2.7501392364501953, "logits/rejected": -2.7644224166870117, "logps/chosen": -0.43927937746047974, "logps/rejected": -1.0804184675216675, "loss": 0.4998, "nll_loss": 0.4916819632053375, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.021963967010378838, "rewards/margins": 0.03205695003271103, "rewards/rejected": -0.054020922631025314, "step": 270 }, { "epoch": 1.33, "grad_norm": 8.39958498646755, "learning_rate": 3.7201064678327008e-06, "log_odds_chosen": 1.620120644569397, "log_odds_ratio": -0.26243138313293457, "logits/chosen": -2.7666475772857666, "logits/rejected": -2.756240129470825, "logps/chosen": -0.4216586947441101, "logps/rejected": -1.1700552701950073, "loss": 0.4808, "nll_loss": 0.4861266016960144, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.021082933992147446, "rewards/margins": 0.03741983324289322, "rewards/rejected": -0.058502763509750366, "step": 280 }, { "epoch": 1.37, "grad_norm": 9.702907898591407, "learning_rate": 3.589448426454486e-06, "log_odds_chosen": 1.5312963724136353, "log_odds_ratio": -0.26756590604782104, "logits/chosen": -2.716158390045166, "logits/rejected": -2.7521204948425293, "logps/chosen": -0.4387860894203186, "logps/rejected": -1.1713616847991943, "loss": 0.4853, "nll_loss": 0.479191392660141, "rewards/accuracies": 0.9375, "rewards/chosen": -0.02193930372595787, "rewards/margins": 0.036628786474466324, "rewards/rejected": -0.0585680827498436, "step": 290 }, { "epoch": 1.42, "grad_norm": 9.595382828818373, "learning_rate": 3.4550066016820755e-06, "log_odds_chosen": 1.752833604812622, "log_odds_ratio": -0.2656395733356476, "logits/chosen": -2.7655932903289795, "logits/rejected": -2.7632648944854736, "logps/chosen": -0.39629921317100525, "logps/rejected": -1.1531693935394287, "loss": 0.4794, "nll_loss": 0.4017585217952728, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.01981496252119541, "rewards/margins": 0.037843503057956696, "rewards/rejected": -0.05765846371650696, "step": 300 }, { "epoch": 1.47, "grad_norm": 8.842462954214794, "learning_rate": 3.317247925893089e-06, "log_odds_chosen": 1.7586959600448608, "log_odds_ratio": -0.24750974774360657, "logits/chosen": -2.7276859283447266, "logits/rejected": -2.7263400554656982, "logps/chosen": -0.40662258863449097, "logps/rejected": -1.34078848361969, "loss": 0.4871, "nll_loss": 0.43305450677871704, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.020331131294369698, "rewards/margins": 0.04670828953385353, "rewards/rejected": -0.06703941524028778, "step": 310 }, { "epoch": 1.52, "grad_norm": 8.573977166367447, "learning_rate": 3.1766508512814997e-06, "log_odds_chosen": 1.6913954019546509, "log_odds_ratio": -0.23067903518676758, "logits/chosen": -2.7266640663146973, "logits/rejected": -2.694443702697754, "logps/chosen": -0.4407864212989807, "logps/rejected": -1.3253118991851807, "loss": 0.4717, "nll_loss": 0.4706190228462219, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.022039320319890976, "rewards/margins": 0.044226277619600296, "rewards/rejected": -0.06626559793949127, "step": 320 }, { "epoch": 1.56, "grad_norm": 9.155832857330214, "learning_rate": 3.0337036881365045e-06, "log_odds_chosen": 1.499769926071167, "log_odds_ratio": -0.2850986421108246, "logits/chosen": -2.7671217918395996, "logits/rejected": -2.7773778438568115, "logps/chosen": -0.44799503684043884, "logps/rejected": -1.2192200422286987, "loss": 0.4549, "nll_loss": 0.49383026361465454, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.022399749606847763, "rewards/margins": 0.038561251014471054, "rewards/rejected": -0.060961008071899414, "step": 330 }, { "epoch": 1.61, "grad_norm": 8.972176715125862, "learning_rate": 2.888902908883069e-06, "log_odds_chosen": 1.6073827743530273, "log_odds_ratio": -0.25966399908065796, "logits/chosen": -2.729976177215576, "logits/rejected": -2.7247419357299805, "logps/chosen": -0.3783184289932251, "logps/rejected": -1.1157445907592773, "loss": 0.503, "nll_loss": 0.4228973388671875, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.018915923312306404, "rewards/margins": 0.03687131032347679, "rewards/rejected": -0.055787235498428345, "step": 340 }, { "epoch": 1.66, "grad_norm": 8.93142291410328, "learning_rate": 2.7427514237744e-06, "log_odds_chosen": 1.4399652481079102, "log_odds_ratio": -0.311657577753067, "logits/chosen": -2.764589786529541, "logits/rejected": -2.756528377532959, "logps/chosen": -0.469074547290802, "logps/rejected": -1.1970831155776978, "loss": 0.4785, "nll_loss": 0.4543870985507965, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0234537310898304, "rewards/margins": 0.03640042990446091, "rewards/rejected": -0.059854160994291306, "step": 350 }, { "epoch": 1.71, "grad_norm": 10.098847690798312, "learning_rate": 2.595756834225089e-06, "log_odds_chosen": 1.34963059425354, "log_odds_ratio": -0.3275851607322693, "logits/chosen": -2.7097485065460205, "logits/rejected": -2.73240327835083, "logps/chosen": -0.4670106768608093, "logps/rejected": -1.1147117614746094, "loss": 0.5122, "nll_loss": 0.5337762832641602, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.023350534960627556, "rewards/margins": 0.03238505870103836, "rewards/rejected": -0.055735595524311066, "step": 360 }, { "epoch": 1.75, "grad_norm": 8.104028348470598, "learning_rate": 2.448429669851285e-06, "log_odds_chosen": 1.480303168296814, "log_odds_ratio": -0.28436464071273804, "logits/chosen": -2.731992483139038, "logits/rejected": -2.7325727939605713, "logps/chosen": -0.4118678569793701, "logps/rejected": -1.0783556699752808, "loss": 0.4646, "nll_loss": 0.4363551139831543, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.020593393594026566, "rewards/margins": 0.03332439437508583, "rewards/rejected": -0.053917787969112396, "step": 370 }, { "epoch": 1.8, "grad_norm": 9.21288938005839, "learning_rate": 2.301281615340886e-06, "log_odds_chosen": 1.5375401973724365, "log_odds_ratio": -0.2999517321586609, "logits/chosen": -2.7647042274475098, "logits/rejected": -2.7518932819366455, "logps/chosen": -0.4663173258304596, "logps/rejected": -1.2805049419403076, "loss": 0.5002, "nll_loss": 0.48935002088546753, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.02331586554646492, "rewards/margins": 0.04070938378572464, "rewards/rejected": -0.06402524560689926, "step": 380 }, { "epoch": 1.85, "grad_norm": 8.485354651581948, "learning_rate": 2.154823733312027e-06, "log_odds_chosen": 1.4252521991729736, "log_odds_ratio": -0.2968854308128357, "logits/chosen": -2.723702907562256, "logits/rejected": -2.7239809036254883, "logps/chosen": -0.4829932153224945, "logps/rejected": -1.1818339824676514, "loss": 0.5363, "nll_loss": 0.524863600730896, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.024149660021066666, "rewards/margins": 0.03494204580783844, "rewards/rejected": -0.059091709554195404, "step": 390 }, { "epoch": 1.9, "grad_norm": 9.4111350259119, "learning_rate": 2.009564689332083e-06, "log_odds_chosen": 1.6615612506866455, "log_odds_ratio": -0.27174240350723267, "logits/chosen": -2.7479844093322754, "logits/rejected": -2.752495288848877, "logps/chosen": -0.42523106932640076, "logps/rejected": -1.2396804094314575, "loss": 0.4901, "nll_loss": 0.4686039984226227, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.02126155234873295, "rewards/margins": 0.04072246700525284, "rewards/rejected": -0.061984021216630936, "step": 400 }, { "epoch": 1.94, "grad_norm": 8.90962135731115, "learning_rate": 1.866008985261924e-06, "log_odds_chosen": 1.4581377506256104, "log_odds_ratio": -0.31245434284210205, "logits/chosen": -2.7534964084625244, "logits/rejected": -2.7579829692840576, "logps/chosen": -0.4420735239982605, "logps/rejected": -1.1250512599945068, "loss": 0.4719, "nll_loss": 0.4840390682220459, "rewards/accuracies": 0.875, "rewards/chosen": -0.022103676572442055, "rewards/margins": 0.034148890525102615, "rewards/rejected": -0.05625256896018982, "step": 410 }, { "epoch": 1.99, "grad_norm": 10.982648615730804, "learning_rate": 1.7246552070612087e-06, "log_odds_chosen": 1.7540910243988037, "log_odds_ratio": -0.25998422503471375, "logits/chosen": -2.738429546356201, "logits/rejected": -2.7384934425354004, "logps/chosen": -0.36052900552749634, "logps/rejected": -1.2116727828979492, "loss": 0.472, "nll_loss": 0.4113571047782898, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.018026452511548996, "rewards/margins": 0.042557187378406525, "rewards/rejected": -0.060583632439374924, "step": 420 }, { "epoch": 2.0, "eval_log_odds_chosen": 0.4637714624404907, "eval_log_odds_ratio": -0.6059489846229553, "eval_logits/chosen": -2.7596328258514404, "eval_logits/rejected": -2.7660648822784424, "eval_logps/chosen": -0.8737161159515381, "eval_logps/rejected": -1.1384042501449585, "eval_loss": 0.9422717094421387, "eval_nll_loss": 0.9339601397514343, "eval_rewards/accuracies": 0.640625, "eval_rewards/chosen": -0.04368580877780914, "eval_rewards/margins": 0.013234402984380722, "eval_rewards/rejected": -0.056920215487480164, "eval_runtime": 57.3721, "eval_samples_per_second": 13.073, "eval_steps_per_second": 0.418, "step": 422 }, { "epoch": 2.04, "grad_norm": 9.26767033665704, "learning_rate": 1.585994293140307e-06, "log_odds_chosen": 2.6479477882385254, "log_odds_ratio": -0.14060619473457336, "logits/chosen": -2.7426111698150635, "logits/rejected": -2.7613494396209717, "logps/chosen": -0.2339097261428833, "logps/rejected": -1.2145100831985474, "loss": 0.2354, "nll_loss": 0.1956883668899536, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.011695485562086105, "rewards/margins": 0.04903002083301544, "rewards/rejected": -0.06072550266981125, "step": 430 }, { "epoch": 2.09, "grad_norm": 7.3805660163482125, "learning_rate": 1.4505078292730631e-06, "log_odds_chosen": 3.2394683361053467, "log_odds_ratio": -0.07881985604763031, "logits/chosen": -2.725740909576416, "logits/rejected": -2.7506771087646484, "logps/chosen": -0.13437342643737793, "logps/rejected": -1.3584119081497192, "loss": 0.1704, "nll_loss": 0.1273941844701767, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.006718672811985016, "rewards/margins": 0.06120193004608154, "rewards/rejected": -0.06792060285806656, "step": 440 }, { "epoch": 2.13, "grad_norm": 8.138860664723373, "learning_rate": 1.3186663759923782e-06, "log_odds_chosen": 3.6539645195007324, "log_odds_ratio": -0.046128250658512115, "logits/chosen": -2.75630521774292, "logits/rejected": -2.7848801612854004, "logps/chosen": -0.12164908647537231, "logps/rejected": -1.645410180091858, "loss": 0.1575, "nll_loss": 0.13616196811199188, "rewards/accuracies": 1.0, "rewards/chosen": -0.006082454230636358, "rewards/margins": 0.07618805766105652, "rewards/rejected": -0.08227051794528961, "step": 450 }, { "epoch": 2.18, "grad_norm": 8.550456232090282, "learning_rate": 1.1909278342777513e-06, "log_odds_chosen": 3.178800106048584, "log_odds_ratio": -0.07669878005981445, "logits/chosen": -2.731027841567993, "logits/rejected": -2.7491018772125244, "logps/chosen": -0.153743177652359, "logps/rejected": -1.2796289920806885, "loss": 0.1752, "nll_loss": 0.19261114299297333, "rewards/accuracies": 1.0, "rewards/chosen": -0.0076871588826179504, "rewards/margins": 0.05629429221153259, "rewards/rejected": -0.06398145854473114, "step": 460 }, { "epoch": 2.23, "grad_norm": 8.542489688724254, "learning_rate": 1.0677358552109617e-06, "log_odds_chosen": 3.4661343097686768, "log_odds_ratio": -0.0592988021671772, "logits/chosen": -2.7476649284362793, "logits/rejected": -2.74284291267395, "logps/chosen": -0.14152076840400696, "logps/rejected": -1.603075623512268, "loss": 0.1517, "nll_loss": 0.1476079523563385, "rewards/accuracies": 1.0, "rewards/chosen": -0.007076038978993893, "rewards/margins": 0.0730777308344841, "rewards/rejected": -0.08015377819538116, "step": 470 }, { "epoch": 2.27, "grad_norm": 7.078338821762873, "learning_rate": 9.495182991233237e-07, "log_odds_chosen": 3.5714924335479736, "log_odds_ratio": -0.05693133547902107, "logits/chosen": -2.7550268173217773, "logits/rejected": -2.76568341255188, "logps/chosen": -0.1448669731616974, "logps/rejected": -1.6731277704238892, "loss": 0.1571, "nll_loss": 0.1459461897611618, "rewards/accuracies": 1.0, "rewards/chosen": -0.007243348751217127, "rewards/margins": 0.07641303539276123, "rewards/rejected": -0.08365638554096222, "step": 480 }, { "epoch": 2.32, "grad_norm": 7.953863925813501, "learning_rate": 8.366857495860869e-07, "log_odds_chosen": 3.53666353225708, "log_odds_ratio": -0.06214044615626335, "logits/chosen": -2.714313507080078, "logits/rejected": -2.744480848312378, "logps/chosen": -0.10873039066791534, "logps/rejected": -1.4339990615844727, "loss": 0.1535, "nll_loss": 0.11224369704723358, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.005436519160866737, "rewards/margins": 0.06626342982053757, "rewards/rejected": -0.07169995456933975, "step": 490 }, { "epoch": 2.37, "grad_norm": 7.8747727873348765, "learning_rate": 7.296300874050766e-07, "log_odds_chosen": 3.4877212047576904, "log_odds_ratio": -0.04788507893681526, "logits/chosen": -2.7288925647735596, "logits/rejected": -2.755156993865967, "logps/chosen": -0.12493199110031128, "logps/rejected": -1.555891513824463, "loss": 0.1404, "nll_loss": 0.13063213229179382, "rewards/accuracies": 1.0, "rewards/chosen": -0.006246599368751049, "rewards/margins": 0.0715479776263237, "rewards/rejected": -0.07779457420110703, "step": 500 }, { "epoch": 2.42, "grad_norm": 8.163301368292721, "learning_rate": 6.28723129572247e-07, "log_odds_chosen": 3.2528719902038574, "log_odds_ratio": -0.06903894245624542, "logits/chosen": -2.723130941390991, "logits/rejected": -2.731053113937378, "logps/chosen": -0.1517888903617859, "logps/rejected": -1.4444904327392578, "loss": 0.1647, "nll_loss": 0.1824251264333725, "rewards/accuracies": 1.0, "rewards/chosen": -0.00758944358676672, "rewards/margins": 0.06463508307933807, "rewards/rejected": -0.07222452014684677, "step": 510 }, { "epoch": 2.46, "grad_norm": 8.191096860807814, "learning_rate": 5.343153379012445e-07, "log_odds_chosen": 3.613795042037964, "log_odds_ratio": -0.05437632277607918, "logits/chosen": -2.7185778617858887, "logits/rejected": -2.738831043243408, "logps/chosen": -0.1386043280363083, "logps/rejected": -1.6129226684570312, "loss": 0.1547, "nll_loss": 0.14940614998340607, "rewards/accuracies": 1.0, "rewards/chosen": -0.006930218078196049, "rewards/margins": 0.07371591031551361, "rewards/rejected": -0.08064613491296768, "step": 520 }, { "epoch": 2.51, "grad_norm": 7.911676913184044, "learning_rate": 4.467346018320198e-07, "log_odds_chosen": 3.289904832839966, "log_odds_ratio": -0.07905766367912292, "logits/chosen": -2.738126754760742, "logits/rejected": -2.765601634979248, "logps/chosen": -0.15802282094955444, "logps/rejected": -1.4789544343948364, "loss": 0.164, "nll_loss": 0.16227465867996216, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007901140488684177, "rewards/margins": 0.0660465732216835, "rewards/rejected": -0.0739477127790451, "step": 530 }, { "epoch": 2.56, "grad_norm": 8.501165443462748, "learning_rate": 3.662850996319825e-07, "log_odds_chosen": 3.346526622772217, "log_odds_ratio": -0.0591273196041584, "logits/chosen": -2.6961731910705566, "logits/rejected": -2.705780506134033, "logps/chosen": -0.1303931623697281, "logps/rejected": -1.3868381977081299, "loss": 0.1629, "nll_loss": 0.1521378755569458, "rewards/accuracies": 1.0, "rewards/chosen": -0.006519658025354147, "rewards/margins": 0.06282225996255875, "rewards/rejected": -0.06934191286563873, "step": 540 }, { "epoch": 2.61, "grad_norm": 9.554188547629353, "learning_rate": 2.932462419488544e-07, "log_odds_chosen": 3.749110460281372, "log_odds_ratio": -0.04820709303021431, "logits/chosen": -2.7394113540649414, "logits/rejected": -2.752619981765747, "logps/chosen": -0.1146271601319313, "logps/rejected": -1.5809662342071533, "loss": 0.133, "nll_loss": 0.13163451850414276, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.005731357727199793, "rewards/margins": 0.07331694662570953, "rewards/rejected": -0.07904830574989319, "step": 550 }, { "epoch": 2.65, "grad_norm": 8.43728434003558, "learning_rate": 2.278717013844059e-07, "log_odds_chosen": 3.417363405227661, "log_odds_ratio": -0.057405441999435425, "logits/chosen": -2.717993974685669, "logits/rejected": -2.7329213619232178, "logps/chosen": -0.12498118728399277, "logps/rejected": -1.3357540369033813, "loss": 0.1578, "nll_loss": 0.13818371295928955, "rewards/accuracies": 1.0, "rewards/chosen": -0.0062490589916706085, "rewards/margins": 0.0605386421084404, "rewards/rejected": -0.06678769737482071, "step": 560 }, { "epoch": 2.7, "grad_norm": 7.63488898142434, "learning_rate": 1.7038853145946804e-07, "log_odds_chosen": 3.4726767539978027, "log_odds_ratio": -0.05518152192234993, "logits/chosen": -2.723026752471924, "logits/rejected": -2.7421092987060547, "logps/chosen": -0.1284557282924652, "logps/rejected": -1.4232304096221924, "loss": 0.1488, "nll_loss": 0.1334700882434845, "rewards/accuracies": 1.0, "rewards/chosen": -0.006422785576432943, "rewards/margins": 0.06473874300718307, "rewards/rejected": -0.07116152346134186, "step": 570 }, { "epoch": 2.75, "grad_norm": 7.573438983056155, "learning_rate": 1.2099637803016983e-07, "log_odds_chosen": 3.2544848918914795, "log_odds_ratio": -0.07813612371683121, "logits/chosen": -2.7465689182281494, "logits/rejected": -2.760995388031006, "logps/chosen": -0.2028094083070755, "logps/rejected": -1.4702503681182861, "loss": 0.1724, "nll_loss": 0.21864549815654755, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01014047022908926, "rewards/margins": 0.06337204575538635, "rewards/rejected": -0.07351251691579819, "step": 580 }, { "epoch": 2.8, "grad_norm": 7.42006307957704, "learning_rate": 7.986678589423757e-08, "log_odds_chosen": 3.658898115158081, "log_odds_ratio": -0.043824274092912674, "logits/chosen": -2.735332489013672, "logits/rejected": -2.7602334022521973, "logps/chosen": -0.13853900134563446, "logps/rejected": -1.616575837135315, "loss": 0.1584, "nll_loss": 0.1556694209575653, "rewards/accuracies": 1.0, "rewards/chosen": -0.006926950998604298, "rewards/margins": 0.07390183210372925, "rewards/rejected": -0.08082877844572067, "step": 590 }, { "epoch": 2.84, "grad_norm": 7.043177228838798, "learning_rate": 4.7142602995598754e-08, "log_odds_chosen": 3.6085071563720703, "log_odds_ratio": -0.05147845670580864, "logits/chosen": -2.7238612174987793, "logits/rejected": -2.7488350868225098, "logps/chosen": -0.12226458638906479, "logps/rejected": -1.4737613201141357, "loss": 0.147, "nll_loss": 0.13915380835533142, "rewards/accuracies": 1.0, "rewards/chosen": -0.006113228853791952, "rewards/margins": 0.06757483631372452, "rewards/rejected": -0.0736880749464035, "step": 600 }, { "epoch": 2.89, "grad_norm": 8.244444250935933, "learning_rate": 2.2937484296556566e-08, "log_odds_chosen": 3.4587013721466064, "log_odds_ratio": -0.05050401762127876, "logits/chosen": -2.7173829078674316, "logits/rejected": -2.74656343460083, "logps/chosen": -0.11210664361715317, "logps/rejected": -1.383599042892456, "loss": 0.145, "nll_loss": 0.12569257616996765, "rewards/accuracies": 1.0, "rewards/chosen": -0.005605332553386688, "rewards/margins": 0.06357462704181671, "rewards/rejected": -0.0691799595952034, "step": 610 }, { "epoch": 2.94, "grad_norm": 8.912901656210648, "learning_rate": 7.335497040648898e-09, "log_odds_chosen": 3.397965908050537, "log_odds_ratio": -0.05181251838803291, "logits/chosen": -2.70883846282959, "logits/rejected": -2.7197322845458984, "logps/chosen": -0.1189185157418251, "logps/rejected": -1.3503382205963135, "loss": 0.1465, "nll_loss": 0.13937658071517944, "rewards/accuracies": 1.0, "rewards/chosen": -0.00594592560082674, "rewards/margins": 0.06157098338007927, "rewards/rejected": -0.06751690804958344, "step": 620 }, { "epoch": 2.99, "grad_norm": 8.441114994591647, "learning_rate": 3.908287771542396e-10, "log_odds_chosen": 3.680515766143799, "log_odds_ratio": -0.04670769348740578, "logits/chosen": -2.716243028640747, "logits/rejected": -2.7199196815490723, "logps/chosen": -0.14760281145572662, "logps/rejected": -1.711219072341919, "loss": 0.1537, "nll_loss": 0.14854022860527039, "rewards/accuracies": 1.0, "rewards/chosen": -0.007380140013992786, "rewards/margins": 0.07818081974983215, "rewards/rejected": -0.08556096255779266, "step": 630 }, { "epoch": 3.0, "eval_log_odds_chosen": 0.5296839475631714, "eval_log_odds_ratio": -0.6105475425720215, "eval_logits/chosen": -2.7270965576171875, "eval_logits/rejected": -2.7365875244140625, "eval_logps/chosen": -1.0283128023147583, "eval_logps/rejected": -1.333586573600769, "eval_loss": 1.1074548959732056, "eval_nll_loss": 1.0961836576461792, "eval_rewards/accuracies": 0.6354166865348816, "eval_rewards/chosen": -0.05141563341021538, "eval_rewards/margins": 0.015263698063790798, "eval_rewards/rejected": -0.06667933613061905, "eval_runtime": 57.2818, "eval_samples_per_second": 13.093, "eval_steps_per_second": 0.419, "step": 633 }, { "epoch": 3.0, "step": 633, "total_flos": 0.0, "train_loss": 0.5264878700506442, "train_runtime": 6083.3362, "train_samples_per_second": 3.329, "train_steps_per_second": 0.104 } ], "logging_steps": 10, "max_steps": 633, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }