{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 169, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.029585798816568046, "grad_norm": 2.8798646926879883, "learning_rate": 0.0002, "loss": 0.9728, "step": 5 }, { "epoch": 0.05917159763313609, "grad_norm": 0.8563704490661621, "learning_rate": 0.0002, "loss": 0.4818, "step": 10 }, { "epoch": 0.08875739644970414, "grad_norm": 0.6149247288703918, "learning_rate": 0.0002, "loss": 0.409, "step": 15 }, { "epoch": 0.11834319526627218, "grad_norm": 0.6278108954429626, "learning_rate": 0.0002, "loss": 0.41, "step": 20 }, { "epoch": 0.14792899408284024, "grad_norm": 0.6502833962440491, "learning_rate": 0.0002, "loss": 0.3889, "step": 25 }, { "epoch": 0.17751479289940827, "grad_norm": 0.4965257942676544, "learning_rate": 0.0002, "loss": 0.3566, "step": 30 }, { "epoch": 0.20710059171597633, "grad_norm": 0.5162677764892578, "learning_rate": 0.0002, "loss": 0.3501, "step": 35 }, { "epoch": 0.23668639053254437, "grad_norm": 0.5184158682823181, "learning_rate": 0.0002, "loss": 0.3403, "step": 40 }, { "epoch": 0.26627218934911245, "grad_norm": 0.5881743431091309, "learning_rate": 0.0002, "loss": 0.3298, "step": 45 }, { "epoch": 0.2958579881656805, "grad_norm": 0.5391240119934082, "learning_rate": 0.0002, "loss": 0.3357, "step": 50 }, { "epoch": 0.3254437869822485, "grad_norm": 0.5666182637214661, "learning_rate": 0.0002, "loss": 0.3275, "step": 55 }, { "epoch": 0.35502958579881655, "grad_norm": 0.48206210136413574, "learning_rate": 0.0002, "loss": 0.3305, "step": 60 }, { "epoch": 0.38461538461538464, "grad_norm": 0.5350416302680969, "learning_rate": 0.0002, "loss": 0.3332, "step": 65 }, { "epoch": 0.41420118343195267, "grad_norm": 0.46314942836761475, "learning_rate": 0.0002, "loss": 0.322, "step": 70 }, { "epoch": 0.4437869822485207, "grad_norm": 0.5748676657676697, "learning_rate": 0.0002, "loss": 0.3155, "step": 75 }, { "epoch": 0.47337278106508873, "grad_norm": 0.5200490355491638, "learning_rate": 0.0002, "loss": 0.3176, "step": 80 }, { "epoch": 0.5029585798816568, "grad_norm": 0.9615280628204346, "learning_rate": 0.0002, "loss": 0.2974, "step": 85 }, { "epoch": 0.5325443786982249, "grad_norm": 0.4187004566192627, "learning_rate": 0.0002, "loss": 0.3195, "step": 90 }, { "epoch": 0.5621301775147929, "grad_norm": 6.856645584106445, "learning_rate": 0.0002, "loss": 0.3072, "step": 95 }, { "epoch": 0.591715976331361, "grad_norm": 0.4104371964931488, "learning_rate": 0.0002, "loss": 0.2993, "step": 100 }, { "epoch": 0.621301775147929, "grad_norm": 0.40844306349754333, "learning_rate": 0.0002, "loss": 0.3151, "step": 105 }, { "epoch": 0.650887573964497, "grad_norm": 0.3981234133243561, "learning_rate": 0.0002, "loss": 0.2849, "step": 110 }, { "epoch": 0.6804733727810651, "grad_norm": 0.4282432496547699, "learning_rate": 0.0002, "loss": 0.3085, "step": 115 }, { "epoch": 0.7100591715976331, "grad_norm": 0.43276387453079224, "learning_rate": 0.0002, "loss": 0.3114, "step": 120 }, { "epoch": 0.7396449704142012, "grad_norm": 0.6382061839103699, "learning_rate": 0.0002, "loss": 0.2861, "step": 125 }, { "epoch": 0.7692307692307693, "grad_norm": 0.37785038352012634, "learning_rate": 0.0002, "loss": 0.3055, "step": 130 }, { "epoch": 0.7988165680473372, "grad_norm": 0.3209759294986725, "learning_rate": 0.0002, "loss": 0.2811, "step": 135 }, { "epoch": 0.8284023668639053, "grad_norm": 0.35694730281829834, "learning_rate": 0.0002, "loss": 0.2746, "step": 140 }, { "epoch": 0.8579881656804734, "grad_norm": 1.352852463722229, "learning_rate": 0.0002, "loss": 0.3107, "step": 145 }, { "epoch": 0.8875739644970414, "grad_norm": 0.3421157896518707, "learning_rate": 0.0002, "loss": 0.2829, "step": 150 }, { "epoch": 0.9171597633136095, "grad_norm": 0.3395731449127197, "learning_rate": 0.0002, "loss": 0.288, "step": 155 }, { "epoch": 0.9467455621301775, "grad_norm": 0.33509358763694763, "learning_rate": 0.0002, "loss": 0.2894, "step": 160 }, { "epoch": 0.9763313609467456, "grad_norm": 0.3939138948917389, "learning_rate": 0.0002, "loss": 0.2941, "step": 165 }, { "epoch": 1.0, "step": 169, "total_flos": 1.3410656498535629e+17, "train_loss": 0.3432579195711034, "train_runtime": 1711.473, "train_samples_per_second": 1.58, "train_steps_per_second": 0.099 } ], "logging_steps": 5, "max_steps": 169, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3410656498535629e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }