{ "best_metric": 11.5, "best_model_checkpoint": "miner_id_24/checkpoint-500", "epoch": 0.6002400960384153, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0012004801920768306, "eval_loss": 11.5, "eval_runtime": 9.2958, "eval_samples_per_second": 150.929, "eval_steps_per_second": 37.759, "step": 1 }, { "epoch": 0.060024009603841535, "grad_norm": 0.0005615451955236495, "learning_rate": 0.000108, "loss": 92.0, "step": 50 }, { "epoch": 0.12004801920768307, "grad_norm": 0.002014897530898452, "learning_rate": 0.000216, "loss": 92.0, "step": 100 }, { "epoch": 0.18007202881152462, "grad_norm": 0.00805481057614088, "learning_rate": 0.00020777898951121896, "loss": 92.0, "step": 150 }, { "epoch": 0.24009603841536614, "grad_norm": 0.007082858122885227, "learning_rate": 0.00018436753236814713, "loss": 92.0, "step": 200 }, { "epoch": 0.30012004801920766, "grad_norm": 0.0037972754798829556, "learning_rate": 0.0001493298106954297, "loss": 92.0, "step": 250 }, { "epoch": 0.36014405762304924, "grad_norm": 0.0060953060165047646, "learning_rate": 0.000108, "loss": 92.0, "step": 300 }, { "epoch": 0.42016806722689076, "grad_norm": 0.008572332561016083, "learning_rate": 6.667018930457031e-05, "loss": 92.0, "step": 350 }, { "epoch": 0.4801920768307323, "grad_norm": 0.0030469526536762714, "learning_rate": 3.1632467631852876e-05, "loss": 92.0, "step": 400 }, { "epoch": 0.5402160864345739, "grad_norm": 0.0042664664797484875, "learning_rate": 8.221010488781032e-06, "loss": 92.0, "step": 450 }, { "epoch": 0.6002400960384153, "grad_norm": 0.0034124753437936306, "learning_rate": 0.0, "loss": 92.0, "step": 500 }, { "epoch": 0.6002400960384153, "eval_loss": 11.5, "eval_runtime": 9.3471, "eval_samples_per_second": 150.1, "eval_steps_per_second": 37.552, "step": 500 } ], "logging_steps": 50, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 80396156928000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }