{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9856861232536214, "global_step": 23000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 3.9153167052369926e-06, "loss": 2.8668, "step": 500 }, { "epoch": 0.04, "learning_rate": 3.829776292105939e-06, "loss": 2.6094, "step": 1000 }, { "epoch": 0.06, "learning_rate": 3.7442358789748862e-06, "loss": 2.5971, "step": 1500 }, { "epoch": 0.09, "learning_rate": 3.6585240421702235e-06, "loss": 2.5186, "step": 2000 }, { "epoch": 0.11, "learning_rate": 3.572812205365561e-06, "loss": 2.4816, "step": 2500 }, { "epoch": 0.13, "learning_rate": 3.487100368560898e-06, "loss": 2.4647, "step": 3000 }, { "epoch": 0.15, "learning_rate": 3.401388531756235e-06, "loss": 2.4669, "step": 3500 }, { "epoch": 0.17, "learning_rate": 3.3156766949515727e-06, "loss": 2.4315, "step": 4000 }, { "epoch": 0.19, "learning_rate": 3.22996485814691e-06, "loss": 2.4304, "step": 4500 }, { "epoch": 0.21, "learning_rate": 3.1442530213422473e-06, "loss": 2.3974, "step": 5000 }, { "epoch": 0.24, "learning_rate": 3.0585411845375846e-06, "loss": 2.3895, "step": 5500 }, { "epoch": 0.26, "learning_rate": 2.972829347732922e-06, "loss": 2.359, "step": 6000 }, { "epoch": 0.28, "learning_rate": 2.8871175109282592e-06, "loss": 2.4182, "step": 6500 }, { "epoch": 0.3, "learning_rate": 2.801405674123596e-06, "loss": 2.4022, "step": 7000 }, { "epoch": 0.32, "learning_rate": 2.7156938373189334e-06, "loss": 2.4052, "step": 7500 }, { "epoch": 0.34, "learning_rate": 2.6299820005142707e-06, "loss": 2.3957, "step": 8000 }, { "epoch": 0.36, "learning_rate": 2.544270163709608e-06, "loss": 2.3473, "step": 8500 }, { "epoch": 0.39, "learning_rate": 2.4585583269049453e-06, "loss": 2.3853, "step": 9000 }, { "epoch": 0.41, "learning_rate": 2.373017913773892e-06, "loss": 2.3702, "step": 9500 }, { "epoch": 0.43, "learning_rate": 2.287306076969229e-06, "loss": 2.3517, "step": 10000 }, { "epoch": 0.45, "learning_rate": 2.2015942401645663e-06, "loss": 2.3135, "step": 10500 }, { "epoch": 0.47, "learning_rate": 2.115882403359904e-06, "loss": 2.2946, "step": 11000 }, { "epoch": 0.49, "learning_rate": 2.0301705665552413e-06, "loss": 2.3325, "step": 11500 }, { "epoch": 0.51, "learning_rate": 1.9444587297505786e-06, "loss": 2.3025, "step": 12000 }, { "epoch": 0.54, "learning_rate": 1.8587468929459157e-06, "loss": 2.2971, "step": 12500 }, { "epoch": 0.56, "learning_rate": 1.773035056141253e-06, "loss": 2.3086, "step": 13000 }, { "epoch": 0.58, "learning_rate": 1.6873232193365903e-06, "loss": 2.2943, "step": 13500 }, { "epoch": 0.6, "learning_rate": 1.601782806205537e-06, "loss": 2.2983, "step": 14000 }, { "epoch": 0.62, "learning_rate": 1.5160709694008742e-06, "loss": 2.297, "step": 14500 }, { "epoch": 0.64, "learning_rate": 1.4303591325962115e-06, "loss": 2.3029, "step": 15000 }, { "epoch": 0.66, "learning_rate": 1.3446472957915486e-06, "loss": 2.3202, "step": 15500 }, { "epoch": 0.69, "learning_rate": 1.2589354589868861e-06, "loss": 2.3167, "step": 16000 }, { "epoch": 0.71, "learning_rate": 1.1732236221822234e-06, "loss": 2.3015, "step": 16500 }, { "epoch": 0.73, "learning_rate": 1.0875117853775605e-06, "loss": 2.2858, "step": 17000 }, { "epoch": 0.75, "learning_rate": 1.0017999485728978e-06, "loss": 2.298, "step": 17500 }, { "epoch": 0.77, "learning_rate": 9.160881117682352e-07, "loss": 2.2984, "step": 18000 }, { "epoch": 0.79, "learning_rate": 8.303762749635724e-07, "loss": 2.3221, "step": 18500 }, { "epoch": 0.81, "learning_rate": 7.44835861832519e-07, "loss": 2.2805, "step": 19000 }, { "epoch": 0.84, "learning_rate": 6.592954487014657e-07, "loss": 2.3499, "step": 19500 }, { "epoch": 0.86, "learning_rate": 5.73583611896803e-07, "loss": 2.2966, "step": 20000 }, { "epoch": 0.88, "learning_rate": 4.878717750921403e-07, "loss": 2.2836, "step": 20500 }, { "epoch": 0.9, "learning_rate": 4.023313619610868e-07, "loss": 2.2698, "step": 21000 }, { "epoch": 0.92, "learning_rate": 3.1661952515642407e-07, "loss": 2.308, "step": 21500 }, { "epoch": 0.94, "learning_rate": 2.310791120253707e-07, "loss": 2.2932, "step": 22000 }, { "epoch": 0.96, "learning_rate": 1.4536727522070796e-07, "loss": 2.2692, "step": 22500 }, { "epoch": 0.99, "learning_rate": 5.965543841604526e-08, "loss": 2.275, "step": 23000 } ], "max_steps": 23334, "num_train_epochs": 1, "total_flos": 2.099179778282496e+16, "trial_name": null, "trial_params": null }