{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.03837421252918039, "eval_steps": 500, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0025582808352786926, "grad_norm": 0.41173064708709717, "learning_rate": 0.00018, "loss": 2.2691, "step": 10 }, { "epoch": 0.005116561670557385, "grad_norm": 0.35278061032295227, "learning_rate": 0.00019889478706014687, "loss": 2.2137, "step": 20 }, { "epoch": 0.007674842505836078, "grad_norm": 0.32279932498931885, "learning_rate": 0.00019510565162951537, "loss": 2.2253, "step": 30 }, { "epoch": 0.01023312334111477, "grad_norm": 0.32902857661247253, "learning_rate": 0.0001887222819443612, "loss": 2.2422, "step": 40 }, { "epoch": 0.012791404176393463, "grad_norm": 0.4015531539916992, "learning_rate": 0.0001799187996894925, "loss": 2.1889, "step": 50 }, { "epoch": 0.015349685011672156, "grad_norm": 0.3013351857662201, "learning_rate": 0.0001689353409118566, "loss": 2.2778, "step": 60 }, { "epoch": 0.017907965846950848, "grad_norm": 0.2108200341463089, "learning_rate": 0.0001560715057351673, "loss": 2.1985, "step": 70 }, { "epoch": 0.02046624668222954, "grad_norm": 0.44372543692588806, "learning_rate": 0.00014167818604952906, "loss": 1.9972, "step": 80 }, { "epoch": 0.023024527517508234, "grad_norm": 0.36838993430137634, "learning_rate": 0.00012614799409538198, "loss": 2.17, "step": 90 }, { "epoch": 0.025582808352786927, "grad_norm": 0.33015120029449463, "learning_rate": 0.0001099045530250463, "loss": 2.2036, "step": 100 }, { "epoch": 0.02814108918806562, "grad_norm": 0.22048693895339966, "learning_rate": 9.339094156743007e-05, "loss": 2.2489, "step": 110 }, { "epoch": 0.030699370023344313, "grad_norm": 0.23766057193279266, "learning_rate": 7.705760799532485e-05, "loss": 1.8966, "step": 120 }, { "epoch": 0.033257650858623006, "grad_norm": 0.24646614491939545, "learning_rate": 6.135008307075481e-05, "loss": 1.9245, "step": 130 }, { "epoch": 0.035815931693901695, "grad_norm": 0.393329918384552, "learning_rate": 4.669682712720065e-05, "loss": 2.2395, "step": 140 }, { "epoch": 0.03837421252918039, "grad_norm": 0.23788638412952423, "learning_rate": 3.349754278861517e-05, "loss": 2.0628, "step": 150 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0352397671453491e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }