{ "best_metric": 2.368725299835205, "best_model_checkpoint": "miner_id_24/checkpoint-30", "epoch": 0.005196154845414394, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001732051615138131, "eval_loss": 8.650031089782715, "eval_runtime": 353.3612, "eval_samples_per_second": 6.88, "eval_steps_per_second": 3.441, "step": 1 }, { "epoch": 0.0005196154845414394, "grad_norm": 9.004079818725586, "learning_rate": 0.00012, "loss": 6.4558, "step": 3 }, { "epoch": 0.0008660258075690655, "eval_loss": 6.17396879196167, "eval_runtime": 356.3946, "eval_samples_per_second": 6.821, "eval_steps_per_second": 3.412, "step": 5 }, { "epoch": 0.0010392309690828788, "grad_norm": 18.178464889526367, "learning_rate": 0.0001992114701314478, "loss": 5.7051, "step": 6 }, { "epoch": 0.001558846453624318, "grad_norm": 26.80777931213379, "learning_rate": 0.00018763066800438636, "loss": 3.9092, "step": 9 }, { "epoch": 0.001732051615138131, "eval_loss": 2.6880009174346924, "eval_runtime": 356.8594, "eval_samples_per_second": 6.812, "eval_steps_per_second": 3.408, "step": 10 }, { "epoch": 0.0020784619381657575, "grad_norm": 4.757950305938721, "learning_rate": 0.000163742398974869, "loss": 2.4351, "step": 12 }, { "epoch": 0.002598077422707197, "grad_norm": 3.0368690490722656, "learning_rate": 0.00013090169943749476, "loss": 2.4886, "step": 15 }, { "epoch": 0.002598077422707197, "eval_loss": 2.514369249343872, "eval_runtime": 356.622, "eval_samples_per_second": 6.817, "eval_steps_per_second": 3.41, "step": 15 }, { "epoch": 0.003117692907248636, "grad_norm": 3.3173792362213135, "learning_rate": 9.372094804706867e-05, "loss": 2.5876, "step": 18 }, { "epoch": 0.003464103230276262, "eval_loss": 2.4166297912597656, "eval_runtime": 356.3483, "eval_samples_per_second": 6.822, "eval_steps_per_second": 3.412, "step": 20 }, { "epoch": 0.0036373083917900753, "grad_norm": 3.117248058319092, "learning_rate": 5.7422070843492734e-05, "loss": 2.2366, "step": 21 }, { "epoch": 0.004156923876331515, "grad_norm": 2.577366590499878, "learning_rate": 2.7103137257858868e-05, "loss": 2.4647, "step": 24 }, { "epoch": 0.004330129037845328, "eval_loss": 2.3770086765289307, "eval_runtime": 356.25, "eval_samples_per_second": 6.824, "eval_steps_per_second": 3.413, "step": 25 }, { "epoch": 0.004676539360872954, "grad_norm": 4.037468910217285, "learning_rate": 7.022351411174866e-06, "loss": 2.3905, "step": 27 }, { "epoch": 0.005196154845414394, "grad_norm": 2.5198121070861816, "learning_rate": 0.0, "loss": 2.1613, "step": 30 }, { "epoch": 0.005196154845414394, "eval_loss": 2.368725299835205, "eval_runtime": 356.3348, "eval_samples_per_second": 6.822, "eval_steps_per_second": 3.413, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.150341542313984e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }