{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 50, "global_step": 147, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06802721088435375, "grad_norm": 1.0879597663879395, "learning_rate": 0.00019577464788732396, "loss": 0.8681, "step": 10 }, { "epoch": 0.1360544217687075, "grad_norm": 0.4811602532863617, "learning_rate": 0.00018169014084507045, "loss": 0.3276, "step": 20 }, { "epoch": 0.20408163265306123, "grad_norm": 0.6136676073074341, "learning_rate": 0.0001676056338028169, "loss": 0.3036, "step": 30 }, { "epoch": 0.272108843537415, "grad_norm": 0.6780334711074829, "learning_rate": 0.00015352112676056339, "loss": 0.314, "step": 40 }, { "epoch": 0.3401360544217687, "grad_norm": 0.47117260098457336, "learning_rate": 0.00013943661971830987, "loss": 0.3115, "step": 50 }, { "epoch": 0.3401360544217687, "eval_loss": 0.0781567394733429, "eval_runtime": 264.3948, "eval_samples_per_second": 0.567, "eval_steps_per_second": 0.095, "step": 50 }, { "epoch": 0.40816326530612246, "grad_norm": 0.5836557745933533, "learning_rate": 0.00012535211267605635, "loss": 0.3005, "step": 60 }, { "epoch": 0.47619047619047616, "grad_norm": 0.7661027312278748, "learning_rate": 0.00011126760563380282, "loss": 0.2917, "step": 70 }, { "epoch": 0.54421768707483, "grad_norm": 0.49967068433761597, "learning_rate": 9.718309859154931e-05, "loss": 0.294, "step": 80 }, { "epoch": 0.6122448979591837, "grad_norm": 0.4809722304344177, "learning_rate": 8.309859154929578e-05, "loss": 0.285, "step": 90 }, { "epoch": 0.6802721088435374, "grad_norm": 0.6247081160545349, "learning_rate": 6.901408450704226e-05, "loss": 0.2824, "step": 100 }, { "epoch": 0.6802721088435374, "eval_loss": 0.07400024682283401, "eval_runtime": 261.0394, "eval_samples_per_second": 0.575, "eval_steps_per_second": 0.096, "step": 100 }, { "epoch": 0.7482993197278912, "grad_norm": 0.6426054239273071, "learning_rate": 5.492957746478874e-05, "loss": 0.2742, "step": 110 }, { "epoch": 0.8163265306122449, "grad_norm": 0.525353193283081, "learning_rate": 4.0845070422535214e-05, "loss": 0.2708, "step": 120 }, { "epoch": 0.8843537414965986, "grad_norm": 0.5583876967430115, "learning_rate": 2.676056338028169e-05, "loss": 0.2713, "step": 130 }, { "epoch": 0.9523809523809523, "grad_norm": 0.6190481185913086, "learning_rate": 1.267605633802817e-05, "loss": 0.2684, "step": 140 } ], "logging_steps": 10, "max_steps": 147, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.7908016076781568e+17, "train_batch_size": 6, "trial_name": null, "trial_params": null }