{ "best_metric": 11.5, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.11668611435239207, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005834305717619603, "eval_loss": 11.5, "eval_runtime": 3.9094, "eval_samples_per_second": 184.681, "eval_steps_per_second": 46.298, "step": 1 }, { "epoch": 0.005834305717619603, "grad_norm": 4.12616936955601e-05, "learning_rate": 4.24e-05, "loss": 23.0, "step": 10 }, { "epoch": 0.011668611435239206, "grad_norm": 2.467960075591691e-05, "learning_rate": 8.48e-05, "loss": 23.0, "step": 20 }, { "epoch": 0.01750291715285881, "grad_norm": 3.497821671771817e-05, "learning_rate": 0.0001272, "loss": 23.0, "step": 30 }, { "epoch": 0.023337222870478413, "grad_norm": 5.835205956827849e-05, "learning_rate": 0.0001696, "loss": 23.0, "step": 40 }, { "epoch": 0.029171528588098017, "grad_norm": 7.785590423736721e-05, "learning_rate": 0.000212, "loss": 23.0, "step": 50 }, { "epoch": 0.029171528588098017, "eval_loss": 11.5, "eval_runtime": 3.9019, "eval_samples_per_second": 185.037, "eval_steps_per_second": 46.387, "step": 50 }, { "epoch": 0.03500583430571762, "grad_norm": 2.9545175493694842e-05, "learning_rate": 0.00021174178932754136, "loss": 23.0, "step": 60 }, { "epoch": 0.040840140023337225, "grad_norm": 7.258558616740629e-05, "learning_rate": 0.00021096841528660647, "loss": 23.0, "step": 70 }, { "epoch": 0.046674445740956826, "grad_norm": 6.44875253783539e-05, "learning_rate": 0.0002096836456777834, "loss": 23.0, "step": 80 }, { "epoch": 0.052508751458576426, "grad_norm": 8.886439900379628e-05, "learning_rate": 0.00020789373976946182, "loss": 23.0, "step": 90 }, { "epoch": 0.058343057176196034, "grad_norm": 0.00012685952242463827, "learning_rate": 0.0002056074178033063, "loss": 23.0, "step": 100 }, { "epoch": 0.058343057176196034, "eval_loss": 11.5, "eval_runtime": 4.0392, "eval_samples_per_second": 178.75, "eval_steps_per_second": 44.811, "step": 100 }, { "epoch": 0.06417736289381563, "grad_norm": 7.025956438155845e-05, "learning_rate": 0.00020283581851011567, "loss": 23.0, "step": 110 }, { "epoch": 0.07001166861143523, "grad_norm": 0.00010229845065623522, "learning_rate": 0.00019959244484304625, "loss": 23.0, "step": 120 }, { "epoch": 0.07584597432905485, "grad_norm": 0.0003530419198796153, "learning_rate": 0.00019589309819258114, "loss": 23.0, "step": 130 }, { "epoch": 0.08168028004667445, "grad_norm": 0.00015672302106395364, "learning_rate": 0.00019175580140374444, "loss": 23.0, "step": 140 }, { "epoch": 0.08751458576429405, "grad_norm": 0.00022062881907913834, "learning_rate": 0.00018720071097061167, "loss": 23.0, "step": 150 }, { "epoch": 0.08751458576429405, "eval_loss": 11.5, "eval_runtime": 3.966, "eval_samples_per_second": 182.046, "eval_steps_per_second": 45.638, "step": 150 }, { "epoch": 0.09334889148191365, "grad_norm": 0.00015543992049060762, "learning_rate": 0.00018225001883589702, "loss": 23.0, "step": 160 }, { "epoch": 0.09918319719953325, "grad_norm": 0.00040469339000992477, "learning_rate": 0.00017692784427403898, "loss": 23.0, "step": 170 }, { "epoch": 0.10501750291715285, "grad_norm": 0.0006950330571271479, "learning_rate": 0.00017126011638451976, "loss": 23.0, "step": 180 }, { "epoch": 0.11085180863477247, "grad_norm": 0.0004899466875940561, "learning_rate": 0.00016527444776789915, "loss": 23.0, "step": 190 }, { "epoch": 0.11668611435239207, "grad_norm": 0.0006722796242684126, "learning_rate": 0.00015900000000000002, "loss": 23.0, "step": 200 }, { "epoch": 0.11668611435239207, "eval_loss": 11.5, "eval_runtime": 3.9865, "eval_samples_per_second": 181.113, "eval_steps_per_second": 45.404, "step": 200 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3999582879744.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }