{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 169, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.029585798816568046, "grad_norm": 2.6524465084075928, "learning_rate": 0.0002, "loss": 0.9777, "step": 5 }, { "epoch": 0.05917159763313609, "grad_norm": 0.8162792325019836, "learning_rate": 0.0002, "loss": 0.4776, "step": 10 }, { "epoch": 0.08875739644970414, "grad_norm": 0.6695148348808289, "learning_rate": 0.0002, "loss": 0.4121, "step": 15 }, { "epoch": 0.11834319526627218, "grad_norm": 0.6613922715187073, "learning_rate": 0.0002, "loss": 0.3996, "step": 20 }, { "epoch": 0.14792899408284024, "grad_norm": 0.5690529346466064, "learning_rate": 0.0002, "loss": 0.3708, "step": 25 }, { "epoch": 0.17751479289940827, "grad_norm": 0.6801875233650208, "learning_rate": 0.0002, "loss": 0.3682, "step": 30 }, { "epoch": 0.20710059171597633, "grad_norm": 0.5573334097862244, "learning_rate": 0.0002, "loss": 0.3578, "step": 35 }, { "epoch": 0.23668639053254437, "grad_norm": 0.48602545261383057, "learning_rate": 0.0002, "loss": 0.3506, "step": 40 }, { "epoch": 0.26627218934911245, "grad_norm": 0.5038454532623291, "learning_rate": 0.0002, "loss": 0.3442, "step": 45 }, { "epoch": 0.2958579881656805, "grad_norm": 0.4245965778827667, "learning_rate": 0.0002, "loss": 0.3278, "step": 50 }, { "epoch": 0.3254437869822485, "grad_norm": 0.4995708465576172, "learning_rate": 0.0002, "loss": 0.3376, "step": 55 }, { "epoch": 0.35502958579881655, "grad_norm": 0.44329550862312317, "learning_rate": 0.0002, "loss": 0.3188, "step": 60 }, { "epoch": 0.38461538461538464, "grad_norm": 1.3675458431243896, "learning_rate": 0.0002, "loss": 0.3306, "step": 65 }, { "epoch": 0.41420118343195267, "grad_norm": 0.5217191576957703, "learning_rate": 0.0002, "loss": 0.3164, "step": 70 }, { "epoch": 0.4437869822485207, "grad_norm": 0.46393847465515137, "learning_rate": 0.0002, "loss": 0.3149, "step": 75 }, { "epoch": 0.47337278106508873, "grad_norm": 0.4221293330192566, "learning_rate": 0.0002, "loss": 0.3086, "step": 80 }, { "epoch": 0.5029585798816568, "grad_norm": 0.4734734892845154, "learning_rate": 0.0002, "loss": 0.3061, "step": 85 }, { "epoch": 0.5325443786982249, "grad_norm": 0.40803948044776917, "learning_rate": 0.0002, "loss": 0.2961, "step": 90 }, { "epoch": 0.5621301775147929, "grad_norm": 0.3886331617832184, "learning_rate": 0.0002, "loss": 0.306, "step": 95 }, { "epoch": 0.591715976331361, "grad_norm": 0.36911633610725403, "learning_rate": 0.0002, "loss": 0.2968, "step": 100 }, { "epoch": 0.621301775147929, "grad_norm": 0.42346394062042236, "learning_rate": 0.0002, "loss": 0.3021, "step": 105 }, { "epoch": 0.650887573964497, "grad_norm": 0.39378297328948975, "learning_rate": 0.0002, "loss": 0.2996, "step": 110 }, { "epoch": 0.6804733727810651, "grad_norm": 0.4639412462711334, "learning_rate": 0.0002, "loss": 0.3017, "step": 115 }, { "epoch": 0.7100591715976331, "grad_norm": 0.35044291615486145, "learning_rate": 0.0002, "loss": 0.2771, "step": 120 }, { "epoch": 0.7396449704142012, "grad_norm": 0.3248099386692047, "learning_rate": 0.0002, "loss": 0.3, "step": 125 }, { "epoch": 0.7692307692307693, "grad_norm": 0.36477237939834595, "learning_rate": 0.0002, "loss": 0.2783, "step": 130 }, { "epoch": 0.7988165680473372, "grad_norm": 0.3327581286430359, "learning_rate": 0.0002, "loss": 0.2737, "step": 135 }, { "epoch": 0.8284023668639053, "grad_norm": 0.3683614134788513, "learning_rate": 0.0002, "loss": 0.2932, "step": 140 }, { "epoch": 0.8579881656804734, "grad_norm": 0.39452311396598816, "learning_rate": 0.0002, "loss": 0.2822, "step": 145 }, { "epoch": 0.8875739644970414, "grad_norm": 0.3625282049179077, "learning_rate": 0.0002, "loss": 0.2833, "step": 150 }, { "epoch": 0.9171597633136095, "grad_norm": 0.3708689212799072, "learning_rate": 0.0002, "loss": 0.2744, "step": 155 }, { "epoch": 0.9467455621301775, "grad_norm": 0.35336822271347046, "learning_rate": 0.0002, "loss": 0.2804, "step": 160 }, { "epoch": 0.9763313609467456, "grad_norm": 0.3205427825450897, "learning_rate": 0.0002, "loss": 0.2875, "step": 165 }, { "epoch": 1.0, "step": 169, "total_flos": 1.3375939562333798e+17, "train_loss": 0.3392575450197479, "train_runtime": 1725.4456, "train_samples_per_second": 1.563, "train_steps_per_second": 0.098 } ], "logging_steps": 5, "max_steps": 169, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3375939562333798e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }