{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9991836734693877, "eval_steps": 100, "global_step": 153, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.125e-07, "logits/chosen": -2.8246347904205322, "logits/rejected": -2.7856249809265137, "logps/chosen": -238.8000030517578, "logps/rejected": -252.79095458984375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.07, "learning_rate": 3.125e-06, "logits/chosen": -2.786532402038574, "logits/rejected": -2.722121238708496, "logps/chosen": -234.37245178222656, "logps/rejected": -252.76654052734375, "loss": 0.6931, "rewards/accuracies": 0.3854166567325592, "rewards/chosen": 0.00019371736561879516, "rewards/margins": -0.0003117284504696727, "rewards/rejected": 0.0005054458160884678, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.989490450759331e-06, "logits/chosen": -2.761892318725586, "logits/rejected": -2.7504990100860596, "logps/chosen": -254.9222869873047, "logps/rejected": -267.8934326171875, "loss": 0.6925, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.008798873052001, "rewards/margins": 0.0012144726933911443, "rewards/rejected": 0.007584400475025177, "step": 20 }, { "epoch": 0.2, "learning_rate": 4.872270441827174e-06, "logits/chosen": -2.7136857509613037, "logits/rejected": -2.7073874473571777, "logps/chosen": -241.10507202148438, "logps/rejected": -277.07415771484375, "loss": 0.6916, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": 0.036142051219940186, "rewards/margins": 0.003209862159565091, "rewards/rejected": 0.032932184636592865, "step": 30 }, { "epoch": 0.26, "learning_rate": 4.630851211353007e-06, "logits/chosen": -2.719532012939453, "logits/rejected": -2.6920456886291504, "logps/chosen": -238.81729125976562, "logps/rejected": -257.0670471191406, "loss": 0.6898, "rewards/accuracies": 0.640625, "rewards/chosen": 0.05626382678747177, "rewards/margins": 0.008078296668827534, "rewards/rejected": 0.048185527324676514, "step": 40 }, { "epoch": 0.33, "learning_rate": 4.277872161641682e-06, "logits/chosen": -2.707075357437134, "logits/rejected": -2.682654619216919, "logps/chosen": -214.5452117919922, "logps/rejected": -231.02877807617188, "loss": 0.6873, "rewards/accuracies": 0.581250011920929, "rewards/chosen": 0.045605290681123734, "rewards/margins": 0.007303851656615734, "rewards/rejected": 0.038301438093185425, "step": 50 }, { "epoch": 0.39, "learning_rate": 3.831813362428005e-06, "logits/chosen": -2.7438042163848877, "logits/rejected": -2.724104166030884, "logps/chosen": -239.53213500976562, "logps/rejected": -274.93902587890625, "loss": 0.6861, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.010097012855112553, "rewards/margins": 0.017007894814014435, "rewards/rejected": -0.006910882890224457, "step": 60 }, { "epoch": 0.46, "learning_rate": 3.3160280345958614e-06, "logits/chosen": -2.7218570709228516, "logits/rejected": -2.7025914192199707, "logps/chosen": -247.56350708007812, "logps/rejected": -262.78961181640625, "loss": 0.6839, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.01939699612557888, "rewards/margins": 0.020038722082972527, "rewards/rejected": -0.0006417257827706635, "step": 70 }, { "epoch": 0.52, "learning_rate": 2.757519902117886e-06, "logits/chosen": -2.650240898132324, "logits/rejected": -2.6454017162323, "logps/chosen": -229.64468383789062, "logps/rejected": -238.07791137695312, "loss": 0.6811, "rewards/accuracies": 0.6343749761581421, "rewards/chosen": -0.007218700833618641, "rewards/margins": 0.023447707295417786, "rewards/rejected": -0.030666405335068703, "step": 80 }, { "epoch": 0.59, "learning_rate": 2.185529423440807e-06, "logits/chosen": -2.719679117202759, "logits/rejected": -2.6773438453674316, "logps/chosen": -231.2781524658203, "logps/rejected": -246.88418579101562, "loss": 0.6793, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.028139105066657066, "rewards/margins": 0.027326997369527817, "rewards/rejected": -0.05546610429883003, "step": 90 }, { "epoch": 0.65, "learning_rate": 1.6300029195778454e-06, "logits/chosen": -2.651785373687744, "logits/rejected": -2.6500556468963623, "logps/chosen": -232.26229858398438, "logps/rejected": -263.81927490234375, "loss": 0.6793, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.013562011532485485, "rewards/margins": 0.03466662019491196, "rewards/rejected": -0.048228632658720016, "step": 100 }, { "epoch": 0.65, "eval_logits/chosen": -2.7459018230438232, "eval_logits/rejected": -2.6667211055755615, "eval_logps/chosen": -288.20477294921875, "eval_logps/rejected": -266.5933532714844, "eval_loss": 0.6749266386032104, "eval_rewards/accuracies": 0.6079999804496765, "eval_rewards/chosen": -0.04165023937821388, "eval_rewards/margins": 0.04912487417459488, "eval_rewards/rejected": -0.09077510982751846, "eval_runtime": 384.036, "eval_samples_per_second": 5.208, "eval_steps_per_second": 0.651, "step": 100 }, { "epoch": 0.72, "learning_rate": 1.1200247470632394e-06, "logits/chosen": -2.6394271850585938, "logits/rejected": -2.623563766479492, "logps/chosen": -241.2955780029297, "logps/rejected": -255.7236785888672, "loss": 0.677, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.05771399661898613, "rewards/margins": 0.021994810551404953, "rewards/rejected": -0.07970880717039108, "step": 110 }, { "epoch": 0.78, "learning_rate": 6.822945986946386e-07, "logits/chosen": -2.7274205684661865, "logits/rejected": -2.681755542755127, "logps/chosen": -255.23196411132812, "logps/rejected": -279.19464111328125, "loss": 0.6754, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.05603557080030441, "rewards/margins": 0.03728459030389786, "rewards/rejected": -0.09332015365362167, "step": 120 }, { "epoch": 0.85, "learning_rate": 3.397296523427807e-07, "logits/chosen": -2.654305934906006, "logits/rejected": -2.646434783935547, "logps/chosen": -262.47210693359375, "logps/rejected": -293.465087890625, "loss": 0.67, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05943986773490906, "rewards/margins": 0.05442862585186958, "rewards/rejected": -0.11386849731206894, "step": 130 }, { "epoch": 0.91, "learning_rate": 1.102647517397798e-07, "logits/chosen": -2.711536407470703, "logits/rejected": -2.693706512451172, "logps/chosen": -233.05447387695312, "logps/rejected": -257.82183837890625, "loss": 0.6737, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06369104981422424, "rewards/margins": 0.037472162395715714, "rewards/rejected": -0.10116322338581085, "step": 140 }, { "epoch": 0.98, "learning_rate": 5.9134352763748345e-09, "logits/chosen": -2.659524440765381, "logits/rejected": -2.654564380645752, "logps/chosen": -237.49960327148438, "logps/rejected": -260.6737365722656, "loss": 0.6724, "rewards/accuracies": 0.59375, "rewards/chosen": -0.05076870322227478, "rewards/margins": 0.045312874019145966, "rewards/rejected": -0.09608156979084015, "step": 150 }, { "epoch": 1.0, "step": 153, "total_flos": 0.0, "train_loss": 0.6821690501730426, "train_runtime": 6321.1061, "train_samples_per_second": 3.101, "train_steps_per_second": 0.024 } ], "logging_steps": 10, "max_steps": 153, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }