{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.05116561670557385, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0025582808352786926, "grad_norm": 0.41173064708709717, "learning_rate": 0.00018, "loss": 2.2691, "step": 10 }, { "epoch": 0.005116561670557385, "grad_norm": 0.35278061032295227, "learning_rate": 0.00019889478706014687, "loss": 2.2137, "step": 20 }, { "epoch": 0.007674842505836078, "grad_norm": 0.32279932498931885, "learning_rate": 0.00019510565162951537, "loss": 2.2253, "step": 30 }, { "epoch": 0.01023312334111477, "grad_norm": 0.32902857661247253, "learning_rate": 0.0001887222819443612, "loss": 2.2422, "step": 40 }, { "epoch": 0.012791404176393463, "grad_norm": 0.4015531539916992, "learning_rate": 0.0001799187996894925, "loss": 2.1889, "step": 50 }, { "epoch": 0.015349685011672156, "grad_norm": 0.3013351857662201, "learning_rate": 0.0001689353409118566, "loss": 2.2778, "step": 60 }, { "epoch": 0.017907965846950848, "grad_norm": 0.2108200341463089, "learning_rate": 0.0001560715057351673, "loss": 2.1985, "step": 70 }, { "epoch": 0.02046624668222954, "grad_norm": 0.44372543692588806, "learning_rate": 0.00014167818604952906, "loss": 1.9972, "step": 80 }, { "epoch": 0.023024527517508234, "grad_norm": 0.36838993430137634, "learning_rate": 0.00012614799409538198, "loss": 2.17, "step": 90 }, { "epoch": 0.025582808352786927, "grad_norm": 0.33015120029449463, "learning_rate": 0.0001099045530250463, "loss": 2.2036, "step": 100 }, { "epoch": 0.02814108918806562, "grad_norm": 0.22048693895339966, "learning_rate": 9.339094156743007e-05, "loss": 2.2489, "step": 110 }, { "epoch": 0.030699370023344313, "grad_norm": 0.23766057193279266, "learning_rate": 7.705760799532485e-05, "loss": 1.8966, "step": 120 }, { "epoch": 0.033257650858623006, "grad_norm": 0.24646614491939545, "learning_rate": 6.135008307075481e-05, "loss": 1.9245, "step": 130 }, { "epoch": 0.035815931693901695, "grad_norm": 0.393329918384552, "learning_rate": 4.669682712720065e-05, "loss": 2.2395, "step": 140 }, { "epoch": 0.03837421252918039, "grad_norm": 0.23788638412952423, "learning_rate": 3.349754278861517e-05, "loss": 2.0628, "step": 150 }, { "epoch": 0.04093249336445908, "grad_norm": 0.5308476686477661, "learning_rate": 2.2112272123788768e-05, "loss": 2.2382, "step": 160 }, { "epoch": 0.04349077419973778, "grad_norm": 0.30160173773765564, "learning_rate": 1.2851575637272262e-05, "loss": 2.0997, "step": 170 }, { "epoch": 0.04604905503501647, "grad_norm": 0.418215274810791, "learning_rate": 5.968060988383883e-06, "loss": 2.0202, "step": 180 }, { "epoch": 0.048607335870295164, "grad_norm": 0.37242060899734497, "learning_rate": 1.6494925127617634e-06, "loss": 2.2179, "step": 190 }, { "epoch": 0.05116561670557385, "grad_norm": 0.3666183352470398, "learning_rate": 1.3669500753099585e-08, "loss": 2.0904, "step": 200 }, { "epoch": 0.05116561670557385, "step": 200, "total_flos": 1.3791695468878234e+17, "train_loss": 2.1512529373168947, "train_runtime": 30852.7311, "train_samples_per_second": 0.052, "train_steps_per_second": 0.006 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3791695468878234e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }