|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9856861232536214, |
|
"global_step": 23000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.9153167052369926e-06, |
|
"loss": 2.8668, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.829776292105939e-06, |
|
"loss": 2.6094, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.7442358789748862e-06, |
|
"loss": 2.5971, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.6585240421702235e-06, |
|
"loss": 2.5186, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.572812205365561e-06, |
|
"loss": 2.4816, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.487100368560898e-06, |
|
"loss": 2.4647, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.401388531756235e-06, |
|
"loss": 2.4669, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.3156766949515727e-06, |
|
"loss": 2.4315, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.22996485814691e-06, |
|
"loss": 2.4304, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.1442530213422473e-06, |
|
"loss": 2.3974, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.0585411845375846e-06, |
|
"loss": 2.3895, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.972829347732922e-06, |
|
"loss": 2.359, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.8871175109282592e-06, |
|
"loss": 2.4182, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.801405674123596e-06, |
|
"loss": 2.4022, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.7156938373189334e-06, |
|
"loss": 2.4052, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.6299820005142707e-06, |
|
"loss": 2.3957, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.544270163709608e-06, |
|
"loss": 2.3473, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.4585583269049453e-06, |
|
"loss": 2.3853, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.373017913773892e-06, |
|
"loss": 2.3702, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.287306076969229e-06, |
|
"loss": 2.3517, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.2015942401645663e-06, |
|
"loss": 2.3135, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.115882403359904e-06, |
|
"loss": 2.2946, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.0301705665552413e-06, |
|
"loss": 2.3325, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9444587297505786e-06, |
|
"loss": 2.3025, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8587468929459157e-06, |
|
"loss": 2.2971, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.773035056141253e-06, |
|
"loss": 2.3086, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.6873232193365903e-06, |
|
"loss": 2.2943, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.601782806205537e-06, |
|
"loss": 2.2983, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.5160709694008742e-06, |
|
"loss": 2.297, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.4303591325962115e-06, |
|
"loss": 2.3029, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.3446472957915486e-06, |
|
"loss": 2.3202, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.2589354589868861e-06, |
|
"loss": 2.3167, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1732236221822234e-06, |
|
"loss": 2.3015, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0875117853775605e-06, |
|
"loss": 2.2858, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.0017999485728978e-06, |
|
"loss": 2.298, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.160881117682352e-07, |
|
"loss": 2.2984, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.303762749635724e-07, |
|
"loss": 2.3221, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.44835861832519e-07, |
|
"loss": 2.2805, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.592954487014657e-07, |
|
"loss": 2.3499, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.73583611896803e-07, |
|
"loss": 2.2966, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.878717750921403e-07, |
|
"loss": 2.2836, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.023313619610868e-07, |
|
"loss": 2.2698, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.1661952515642407e-07, |
|
"loss": 2.308, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.310791120253707e-07, |
|
"loss": 2.2932, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.4536727522070796e-07, |
|
"loss": 2.2692, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.965543841604526e-08, |
|
"loss": 2.275, |
|
"step": 23000 |
|
} |
|
], |
|
"max_steps": 23334, |
|
"num_train_epochs": 1, |
|
"total_flos": 2.099179778282496e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|