{ "best_metric": null, "best_model_checkpoint": null, "episode": 128, "epoch": 1.28, "eval_steps": 100, "global_step": 2, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "episode": 64, "epoch": 0.64, "eps": 0, "loss/policy_avg": -0.015076532028615475, "loss/value_avg": 2.0768136978149414, "lr": 3e-06, "objective/entropy": 13.955379486083984, "objective/kl": 0.8348603248596191, "objective/non_score_reward": -0.041743017733097076, "objective/rlhf_reward": -1.4829208850860596, "objective/scores": -1.4411778450012207, "policy/approxkl_avg": 0.003379022004082799, "policy/clipfrac_avg": 0.02070312574505806, "policy/entropy_avg": 0.9434927701950073, "step": 1, "val/clipfrac_avg": 0.6554687023162842, "val/num_eos_tokens": 0, "val/ratio": 0.9607726335525513, "val/ratio_var": 0.0007270940695889294 }, { "episode": 128, "epoch": 1.28, "eps": 0, "loss/policy_avg": -0.010912335477769375, "loss/value_avg": 0.6733213663101196, "lr": 1.5e-06, "objective/entropy": 10.704127311706543, "objective/kl": 0.7690310478210449, "objective/non_score_reward": -0.038451552391052246, "objective/rlhf_reward": -1.1398342847824097, "objective/scores": -1.1013827323913574, "policy/approxkl_avg": 0.0029775083530694246, "policy/clipfrac_avg": 0.01894531212747097, "policy/entropy_avg": 0.7254143357276917, "step": 2, "val/clipfrac_avg": 0.6109374761581421, "val/num_eos_tokens": 0, "val/ratio": 0.9696837663650513, "val/ratio_var": 0.0008283399511128664 } ], "logging_steps": 500, "max_steps": 2, "num_input_tokens_seen": 0, "num_train_epochs": 1.0, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0, "train_batch_size": null, "trial_name": null, "trial_params": null }