{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9938900203665988, "eval_steps": 500, "global_step": 122, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016293279022403257, "grad_norm": 1.4111297130584717, "learning_rate": 3.846153846153846e-08, "logits/chosen": -3.2578125, "logits/rejected": -3.19140625, "logps/chosen": -46.375, "logps/rejected": -45.75, "loss": 0.6914, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.1629327902240326, "grad_norm": 0.7594385147094727, "learning_rate": 3.8461538461538463e-07, "logits/chosen": -3.2265625, "logits/rejected": -3.2200520038604736, "logps/chosen": -42.77604293823242, "logps/rejected": -41.88541793823242, "loss": 0.6924, "rewards/accuracies": 0.2222222238779068, "rewards/chosen": 0.0023810069542378187, "rewards/margins": -0.0003809928894042969, "rewards/rejected": 0.0027594566345214844, "step": 10 }, { "epoch": 0.3258655804480652, "grad_norm": 0.7135093808174133, "learning_rate": 4.949291683053768e-07, "logits/chosen": -3.2763671875, "logits/rejected": -3.2466797828674316, "logps/chosen": -40.720314025878906, "logps/rejected": -39.4296875, "loss": 0.6872, "rewards/accuracies": 0.421875, "rewards/chosen": 0.033158015459775925, "rewards/margins": 0.012033844366669655, "rewards/rejected": 0.02114267274737358, "step": 20 }, { "epoch": 0.48879837067209775, "grad_norm": 0.699894368648529, "learning_rate": 4.70586371748506e-07, "logits/chosen": -3.2841796875, "logits/rejected": -3.2529296875, "logps/chosen": -40.8515625, "logps/rejected": -40.571876525878906, "loss": 0.6765, "rewards/accuracies": 0.528124988079071, "rewards/chosen": 0.0396418571472168, "rewards/margins": 0.034914396703243256, "rewards/rejected": 0.004716300871223211, "step": 30 }, { "epoch": 0.6517311608961304, "grad_norm": 1.1233805418014526, "learning_rate": 4.280458575653296e-07, "logits/chosen": -3.195117235183716, "logits/rejected": -3.162890672683716, "logps/chosen": -43.59375, "logps/rejected": -44.532814025878906, "loss": 0.6415, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -0.06923361122608185, "rewards/margins": 0.11445312201976776, "rewards/rejected": -0.1837112456560135, "step": 40 }, { "epoch": 0.814663951120163, "grad_norm": 1.2118208408355713, "learning_rate": 3.7081709127108767e-07, "logits/chosen": -3.0589842796325684, "logits/rejected": NaN, "logps/chosen": -50.13593673706055, "logps/rejected": -52.04999923706055, "loss": 0.6138, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.3750244081020355, "rewards/margins": 0.18903808295726776, "rewards/rejected": -0.5643554925918579, "step": 50 }, { "epoch": 0.9775967413441955, "grad_norm": 1.2914516925811768, "learning_rate": 3.0362127536287636e-07, "logits/chosen": -3.135546922683716, "logits/rejected": -3.056835889816284, "logps/chosen": -50.14531326293945, "logps/rejected": -56.23749923706055, "loss": 0.566, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.4345153868198395, "rewards/margins": 0.36333543062210083, "rewards/rejected": -0.7979736328125, "step": 60 }, { "epoch": 1.1466395112016294, "grad_norm": 1.4273715019226074, "learning_rate": 2.3200186419770823e-07, "logits/chosen": -3.161651134490967, "logits/rejected": -3.0788965225219727, "logps/chosen": -53.49691390991211, "logps/rejected": -62.85802459716797, "loss": 0.5371, "rewards/accuracies": 0.6095678806304932, "rewards/chosen": -0.5622889995574951, "rewards/margins": 0.5249747037887573, "rewards/rejected": -1.0871431827545166, "step": 70 }, { "epoch": 1.309572301425662, "grad_norm": 1.3244953155517578, "learning_rate": 1.6186724554503237e-07, "logits/chosen": -3.133984327316284, "logits/rejected": -3.0191407203674316, "logps/chosen": -55.234375, "logps/rejected": -68.234375, "loss": 0.4905, "rewards/accuracies": 0.660937488079071, "rewards/chosen": -0.6110439300537109, "rewards/margins": 0.7057861089706421, "rewards/rejected": -1.3170897960662842, "step": 80 }, { "epoch": 1.4725050916496945, "grad_norm": 1.8561619520187378, "learning_rate": 9.900331622138063e-08, "logits/chosen": -3.107617139816284, "logits/rejected": -2.9839844703674316, "logps/chosen": -55.421875, "logps/rejected": -68.80937194824219, "loss": 0.4936, "rewards/accuracies": 0.640625, "rewards/chosen": -0.6593307256698608, "rewards/margins": 0.737597644329071, "rewards/rejected": -1.3976562023162842, "step": 90 }, { "epoch": 1.635437881873727, "grad_norm": 0.9447304606437683, "learning_rate": 4.859616286322094e-08, "logits/chosen": -3.1148438453674316, "logits/rejected": -2.9876952171325684, "logps/chosen": -53.092185974121094, "logps/rejected": -68.9312515258789, "loss": 0.468, "rewards/accuracies": 0.6484375, "rewards/chosen": -0.5302764773368835, "rewards/margins": 0.887438952922821, "rewards/rejected": -1.417944312095642, "step": 100 }, { "epoch": 1.7983706720977597, "grad_norm": 1.593487024307251, "learning_rate": 1.4804225250339281e-08, "logits/chosen": -3.1207032203674316, "logits/rejected": NaN, "logps/chosen": -54.428123474121094, "logps/rejected": -68.76249694824219, "loss": 0.4758, "rewards/accuracies": 0.640625, "rewards/chosen": -0.6090973019599915, "rewards/margins": 0.817614734172821, "rewards/rejected": -1.4268066883087158, "step": 110 }, { "epoch": 1.9613034623217924, "grad_norm": 2.265634059906006, "learning_rate": 4.152374292708538e-10, "logits/chosen": -3.109179735183716, "logits/rejected": -2.985156297683716, "logps/chosen": -54.99687576293945, "logps/rejected": -71.015625, "loss": 0.4654, "rewards/accuracies": 0.667187511920929, "rewards/chosen": -0.6471847295761108, "rewards/margins": 0.863818347454071, "rewards/rejected": -1.510644555091858, "step": 120 }, { "epoch": 1.9938900203665988, "step": 122, "total_flos": 0.0, "train_loss": 0.5645458033827485, "train_runtime": 476.0531, "train_samples_per_second": 16.473, "train_steps_per_second": 0.256 } ], "logging_steps": 10, "max_steps": 122, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }