{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 3750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "grad_norm": 4.658291339874268, "learning_rate": 2.8703181864639013e-06, "loss": 0.64, "step": 500 }, { "epoch": 0.8, "grad_norm": 4.34930944442749, "learning_rate": 2.5036959095382875e-06, "loss": 0.6413, "step": 1000 }, { "epoch": 1.0, "eval_loss": 0.6420477032661438, "eval_runtime": 20.5136, "eval_samples_per_second": 97.497, "eval_steps_per_second": 12.187, "step": 1250 }, { "epoch": 1.2, "grad_norm": 5.983999252319336, "learning_rate": 1.963525491562421e-06, "loss": 0.4847, "step": 1500 }, { "epoch": 1.6, "grad_norm": 5.177482604980469, "learning_rate": 1.3432073050985201e-06, "loss": 0.3344, "step": 2000 }, { "epoch": 2.0, "grad_norm": 4.585903167724609, "learning_rate": 7.500000000000003e-07, "loss": 0.3318, "step": 2500 }, { "epoch": 2.0, "eval_loss": 0.7324458360671997, "eval_runtime": 20.5637, "eval_samples_per_second": 97.259, "eval_steps_per_second": 12.157, "step": 2500 }, { "epoch": 2.4, "grad_norm": 5.989898681640625, "learning_rate": 2.86474508437579e-07, "loss": 0.1551, "step": 3000 }, { "epoch": 2.8, "grad_norm": 5.329368591308594, "learning_rate": 3.277859889929147e-08, "loss": 0.1518, "step": 3500 }, { "epoch": 3.0, "eval_loss": 0.994577169418335, "eval_runtime": 20.6046, "eval_samples_per_second": 97.066, "eval_steps_per_second": 12.133, "step": 3750 }, { "epoch": 3.0, "step": 3750, "total_flos": 2.3924465493816115e+17, "train_loss": 0.37510518595377607, "train_runtime": 2372.5928, "train_samples_per_second": 12.644, "train_steps_per_second": 1.581 } ], "logging_steps": 500, "max_steps": 3750, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.3924465493816115e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }