{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.11323641928079571, "eval_steps": 5, "global_step": 37, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00306044376434583, "eval_loss": 1.1051609516143799, "eval_runtime": 26.5057, "eval_samples_per_second": 5.206, "eval_steps_per_second": 2.603, "step": 1 }, { "epoch": 0.009181331293037491, "grad_norm": 0.49553486704826355, "learning_rate": 3.3333333333333335e-05, "loss": 1.0599, "step": 3 }, { "epoch": 0.015302218821729151, "eval_loss": 1.0945595502853394, "eval_runtime": 25.9873, "eval_samples_per_second": 5.31, "eval_steps_per_second": 2.655, "step": 5 }, { "epoch": 0.018362662586074982, "grad_norm": 0.5389537811279297, "learning_rate": 6.666666666666667e-05, "loss": 1.1064, "step": 6 }, { "epoch": 0.02754399387911247, "grad_norm": 0.4423370361328125, "learning_rate": 0.0001, "loss": 0.975, "step": 9 }, { "epoch": 0.030604437643458302, "eval_loss": 0.9807908535003662, "eval_runtime": 25.9978, "eval_samples_per_second": 5.308, "eval_steps_per_second": 2.654, "step": 10 }, { "epoch": 0.036725325172149964, "grad_norm": 0.6366612315177917, "learning_rate": 0.00013333333333333334, "loss": 0.9206, "step": 12 }, { "epoch": 0.045906656465187455, "grad_norm": 0.6869972944259644, "learning_rate": 0.0001666666666666667, "loss": 0.8698, "step": 15 }, { "epoch": 0.045906656465187455, "eval_loss": 0.8133208751678467, "eval_runtime": 25.9999, "eval_samples_per_second": 5.308, "eval_steps_per_second": 2.654, "step": 15 }, { "epoch": 0.05508798775822494, "grad_norm": 0.7443881034851074, "learning_rate": 0.0002, "loss": 0.7708, "step": 18 }, { "epoch": 0.061208875286916604, "eval_loss": 0.7175387144088745, "eval_runtime": 26.0285, "eval_samples_per_second": 5.302, "eval_steps_per_second": 2.651, "step": 20 }, { "epoch": 0.06426931905126243, "grad_norm": 0.667968213558197, "learning_rate": 0.0001879473751206489, "loss": 0.7045, "step": 21 }, { "epoch": 0.07345065034429993, "grad_norm": 0.618311882019043, "learning_rate": 0.00015469481581224272, "loss": 0.6921, "step": 24 }, { "epoch": 0.07651109410864575, "eval_loss": 0.6743873953819275, "eval_runtime": 26.0301, "eval_samples_per_second": 5.302, "eval_steps_per_second": 2.651, "step": 25 }, { "epoch": 0.08263198163733741, "grad_norm": 0.48745396733283997, "learning_rate": 0.00010825793454723325, "loss": 0.6098, "step": 27 }, { "epoch": 0.09181331293037491, "grad_norm": 0.530785083770752, "learning_rate": 5.983045753470308e-05, "loss": 0.611, "step": 30 }, { "epoch": 0.09181331293037491, "eval_loss": 0.6592618227005005, "eval_runtime": 26.2376, "eval_samples_per_second": 5.26, "eval_steps_per_second": 2.63, "step": 30 }, { "epoch": 0.1009946442234124, "grad_norm": 0.5798205733299255, "learning_rate": 2.1085949060360654e-05, "loss": 0.7136, "step": 33 }, { "epoch": 0.10711553175210406, "eval_loss": 0.6530157923698425, "eval_runtime": 26.1128, "eval_samples_per_second": 5.285, "eval_steps_per_second": 2.642, "step": 35 }, { "epoch": 0.11017597551644988, "grad_norm": 0.5891212821006775, "learning_rate": 1.3638696597277679e-06, "loss": 0.6765, "step": 36 } ], "logging_steps": 3, "max_steps": 37, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 18, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3779253648687104e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }