{ "best_metric": 11.5, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.027311211252219036, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013655605626109517, "eval_loss": 11.5, "eval_runtime": 35.0696, "eval_samples_per_second": 87.939, "eval_steps_per_second": 21.985, "step": 1 }, { "epoch": 0.0013655605626109518, "grad_norm": 4.888439434580505e-06, "learning_rate": 4.12e-05, "loss": 23.0, "step": 10 }, { "epoch": 0.0027311211252219036, "grad_norm": 1.274573241971666e-05, "learning_rate": 8.24e-05, "loss": 23.0, "step": 20 }, { "epoch": 0.004096681687832855, "grad_norm": 1.1897797776327934e-05, "learning_rate": 0.0001236, "loss": 23.0, "step": 30 }, { "epoch": 0.005462242250443807, "grad_norm": 1.6764104657340795e-05, "learning_rate": 0.0001648, "loss": 23.0, "step": 40 }, { "epoch": 0.006827802813054759, "grad_norm": 3.6385521525517106e-05, "learning_rate": 0.000206, "loss": 23.0, "step": 50 }, { "epoch": 0.006827802813054759, "eval_loss": 11.5, "eval_runtime": 34.9847, "eval_samples_per_second": 88.153, "eval_steps_per_second": 22.038, "step": 50 }, { "epoch": 0.00819336337566571, "grad_norm": 2.2429987438954413e-05, "learning_rate": 0.0002057490971767619, "loss": 23.0, "step": 60 }, { "epoch": 0.009558923938276663, "grad_norm": 3.156892489641905e-05, "learning_rate": 0.00020499761108038175, "loss": 23.0, "step": 70 }, { "epoch": 0.010924484500887614, "grad_norm": 2.4492210286553018e-05, "learning_rate": 0.00020374920287558198, "loss": 23.0, "step": 80 }, { "epoch": 0.012290045063498567, "grad_norm": 1.4094251127971802e-05, "learning_rate": 0.00020200995468164684, "loss": 23.0, "step": 90 }, { "epoch": 0.013655605626109518, "grad_norm": 0.00013197172665968537, "learning_rate": 0.00019978833994094855, "loss": 23.0, "step": 100 }, { "epoch": 0.013655605626109518, "eval_loss": 11.5, "eval_runtime": 34.9937, "eval_samples_per_second": 88.13, "eval_steps_per_second": 22.033, "step": 100 }, { "epoch": 0.01502116618872047, "grad_norm": 6.907564966240898e-05, "learning_rate": 0.00019709518213718787, "loss": 23.0, "step": 110 }, { "epoch": 0.01638672675133142, "grad_norm": 2.6268724468536675e-05, "learning_rate": 0.00019394360206446948, "loss": 23.0, "step": 120 }, { "epoch": 0.017752287313942374, "grad_norm": 0.00011045205610571429, "learning_rate": 0.00019034895390411186, "loss": 23.0, "step": 130 }, { "epoch": 0.019117847876553327, "grad_norm": 0.00011072980123572052, "learning_rate": 0.0001863287504206196, "loss": 23.0, "step": 140 }, { "epoch": 0.020483408439164276, "grad_norm": 0.00032751320395618677, "learning_rate": 0.00018190257764125471, "loss": 23.0, "step": 150 }, { "epoch": 0.020483408439164276, "eval_loss": 11.5, "eval_runtime": 35.0218, "eval_samples_per_second": 88.06, "eval_steps_per_second": 22.015, "step": 150 }, { "epoch": 0.02184896900177523, "grad_norm": 0.00021085802291054279, "learning_rate": 0.00017709199943488106, "loss": 23.0, "step": 160 }, { "epoch": 0.02321452956438618, "grad_norm": 0.00017776116146706045, "learning_rate": 0.00017192045245496238, "loss": 23.0, "step": 170 }, { "epoch": 0.024580090126997134, "grad_norm": 7.381874456768855e-05, "learning_rate": 0.00016641313195854277, "loss": 23.0, "step": 180 }, { "epoch": 0.025945650689608083, "grad_norm": 0.0005210270173847675, "learning_rate": 0.0001605968690574869, "loss": 23.0, "step": 190 }, { "epoch": 0.027311211252219036, "grad_norm": 0.0003010142536368221, "learning_rate": 0.0001545, "loss": 23.0, "step": 200 }, { "epoch": 0.027311211252219036, "eval_loss": 11.5, "eval_runtime": 35.0475, "eval_samples_per_second": 87.995, "eval_steps_per_second": 21.999, "step": 200 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7101503766528.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }