{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9412650602409639, "eval_steps": 500, "global_step": 625, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03765060240963856, "grad_norm": 0.29182711243629456, "learning_rate": 0.0002, "loss": 2.048, "step": 25 }, { "epoch": 0.07530120481927711, "grad_norm": 0.355770081281662, "learning_rate": 0.0002, "loss": 1.9254, "step": 50 }, { "epoch": 0.11295180722891567, "grad_norm": 0.24575483798980713, "learning_rate": 0.0002, "loss": 1.8281, "step": 75 }, { "epoch": 0.15060240963855423, "grad_norm": 0.370360791683197, "learning_rate": 0.0002, "loss": 1.8354, "step": 100 }, { "epoch": 0.18825301204819278, "grad_norm": 0.30005237460136414, "learning_rate": 0.0002, "loss": 1.8066, "step": 125 }, { "epoch": 0.22590361445783133, "grad_norm": 0.4004781246185303, "learning_rate": 0.0002, "loss": 2.0024, "step": 150 }, { "epoch": 0.2635542168674699, "grad_norm": 0.37836953997612, "learning_rate": 0.0002, "loss": 1.9717, "step": 175 }, { "epoch": 0.30120481927710846, "grad_norm": 0.38621386885643005, "learning_rate": 0.0002, "loss": 2.0938, "step": 200 }, { "epoch": 0.338855421686747, "grad_norm": 0.32652556896209717, "learning_rate": 0.0002, "loss": 1.9128, "step": 225 }, { "epoch": 0.37650602409638556, "grad_norm": 0.3783712387084961, "learning_rate": 0.0002, "loss": 1.7907, "step": 250 }, { "epoch": 0.4141566265060241, "grad_norm": 0.2575533390045166, "learning_rate": 0.0002, "loss": 1.8501, "step": 275 }, { "epoch": 0.45180722891566266, "grad_norm": 0.37206143140792847, "learning_rate": 0.0002, "loss": 1.7147, "step": 300 }, { "epoch": 0.4894578313253012, "grad_norm": 0.3018392324447632, "learning_rate": 0.0002, "loss": 1.602, "step": 325 }, { "epoch": 0.5271084337349398, "grad_norm": 0.45196229219436646, "learning_rate": 0.0002, "loss": 1.8455, "step": 350 }, { "epoch": 0.5647590361445783, "grad_norm": 0.250590443611145, "learning_rate": 0.0002, "loss": 1.9206, "step": 375 }, { "epoch": 0.6024096385542169, "grad_norm": 0.314155638217926, "learning_rate": 0.0002, "loss": 1.908, "step": 400 }, { "epoch": 0.6400602409638554, "grad_norm": 0.319771409034729, "learning_rate": 0.0002, "loss": 1.6045, "step": 425 }, { "epoch": 0.677710843373494, "grad_norm": 0.4088861346244812, "learning_rate": 0.0002, "loss": 1.7106, "step": 450 }, { "epoch": 0.7153614457831325, "grad_norm": 0.29917973279953003, "learning_rate": 0.0002, "loss": 1.7349, "step": 475 }, { "epoch": 0.7530120481927711, "grad_norm": 0.6233075857162476, "learning_rate": 0.0002, "loss": 1.9191, "step": 500 }, { "epoch": 0.7906626506024096, "grad_norm": 0.24983790516853333, "learning_rate": 0.0002, "loss": 1.6759, "step": 525 }, { "epoch": 0.8283132530120482, "grad_norm": 0.42871519923210144, "learning_rate": 0.0002, "loss": 1.7982, "step": 550 }, { "epoch": 0.8659638554216867, "grad_norm": 0.3003176748752594, "learning_rate": 0.0002, "loss": 1.7154, "step": 575 }, { "epoch": 0.9036144578313253, "grad_norm": 0.4300028681755066, "learning_rate": 0.0002, "loss": 1.6376, "step": 600 }, { "epoch": 0.9412650602409639, "grad_norm": 0.38282954692840576, "learning_rate": 0.0002, "loss": 1.545, "step": 625 } ], "logging_steps": 25, "max_steps": 664, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.109390150045696e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }