{ "best_metric": 11.5, "best_model_checkpoint": "miner_id_24/checkpoint-500", "epoch": 0.019598428206057873, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.799214103028936e-06, "eval_loss": 11.5, "eval_runtime": 247.5967, "eval_samples_per_second": 173.54, "eval_steps_per_second": 86.77, "step": 1 }, { "epoch": 0.0004899607051514469, "grad_norm": 0.0002126572362612933, "learning_rate": 0.0002, "loss": 46.0, "step": 50 }, { "epoch": 0.0009799214103028938, "grad_norm": 0.00118893978651613, "learning_rate": 0.00019994965423831854, "loss": 46.0, "step": 100 }, { "epoch": 0.0014698821154543406, "grad_norm": 0.000772263330873102, "learning_rate": 0.00019979866764718843, "loss": 46.0, "step": 150 }, { "epoch": 0.0019598428206057876, "grad_norm": 0.001554933493025601, "learning_rate": 0.00019954719225730847, "loss": 46.0, "step": 200 }, { "epoch": 0.002449803525757234, "grad_norm": 0.0017557292012497783, "learning_rate": 0.00019919548128307954, "loss": 46.0, "step": 250 }, { "epoch": 0.002939764230908681, "grad_norm": 0.0027389563620090485, "learning_rate": 0.00019874388886763944, "loss": 46.0, "step": 300 }, { "epoch": 0.003429724936060128, "grad_norm": 0.00450202776119113, "learning_rate": 0.00019819286972627066, "loss": 46.0, "step": 350 }, { "epoch": 0.003919685641211575, "grad_norm": 0.004904056899249554, "learning_rate": 0.00019754297868854073, "loss": 46.0, "step": 400 }, { "epoch": 0.004409646346363021, "grad_norm": 0.004392196424305439, "learning_rate": 0.00019679487013963564, "loss": 46.0, "step": 450 }, { "epoch": 0.004899607051514468, "grad_norm": 0.0027909581549465656, "learning_rate": 0.00019594929736144976, "loss": 46.0, "step": 500 }, { "epoch": 0.004899607051514468, "eval_loss": 11.5, "eval_runtime": 246.6985, "eval_samples_per_second": 174.172, "eval_steps_per_second": 87.086, "step": 500 }, { "epoch": 0.005389567756665915, "grad_norm": 0.006504695396870375, "learning_rate": 0.00019500711177409454, "loss": 46.0, "step": 550 }, { "epoch": 0.005879528461817362, "grad_norm": 0.0024417981039732695, "learning_rate": 0.00019396926207859084, "loss": 46.0, "step": 600 }, { "epoch": 0.006369489166968809, "grad_norm": 0.005731500219553709, "learning_rate": 0.00019283679330160726, "loss": 46.0, "step": 650 }, { "epoch": 0.006859449872120256, "grad_norm": 0.004803692921996117, "learning_rate": 0.00019161084574320696, "loss": 46.0, "step": 700 }, { "epoch": 0.0073494105772717025, "grad_norm": 0.005611758213490248, "learning_rate": 0.00019029265382866214, "loss": 46.0, "step": 750 }, { "epoch": 0.00783937128242315, "grad_norm": 0.00291110179387033, "learning_rate": 0.00018888354486549237, "loss": 46.0, "step": 800 }, { "epoch": 0.008329331987574597, "grad_norm": 0.0042321644723415375, "learning_rate": 0.00018738493770697852, "loss": 46.0, "step": 850 }, { "epoch": 0.008819292692726043, "grad_norm": 0.006696199532598257, "learning_rate": 0.00018579834132349772, "loss": 46.0, "step": 900 }, { "epoch": 0.00930925339787749, "grad_norm": 0.005761832930147648, "learning_rate": 0.00018412535328311814, "loss": 46.0, "step": 950 }, { "epoch": 0.009799214103028937, "grad_norm": 0.0030888088513165712, "learning_rate": 0.0001823676581429833, "loss": 46.0, "step": 1000 }, { "epoch": 0.009799214103028937, "eval_loss": 11.5, "eval_runtime": 243.388, "eval_samples_per_second": 176.541, "eval_steps_per_second": 88.271, "step": 1000 }, { "epoch": 0.010289174808180384, "grad_norm": 0.0029801796190440655, "learning_rate": 0.00018052702575310588, "loss": 46.0, "step": 1050 }, { "epoch": 0.01077913551333183, "grad_norm": 0.002504534786567092, "learning_rate": 0.00017860530947427875, "loss": 46.0, "step": 1100 }, { "epoch": 0.011269096218483278, "grad_norm": 0.022988324984908104, "learning_rate": 0.0001766044443118978, "loss": 46.0, "step": 1150 }, { "epoch": 0.011759056923634725, "grad_norm": 0.0028649207670241594, "learning_rate": 0.0001745264449675755, "loss": 46.0, "step": 1200 }, { "epoch": 0.012249017628786172, "grad_norm": 0.003464834066107869, "learning_rate": 0.00017237340381050703, "loss": 46.0, "step": 1250 }, { "epoch": 0.012738978333937619, "grad_norm": 0.005679805763065815, "learning_rate": 0.00017014748877063214, "loss": 46.0, "step": 1300 }, { "epoch": 0.013228939039089066, "grad_norm": 0.006639096420258284, "learning_rate": 0.00016785094115571322, "loss": 46.0, "step": 1350 }, { "epoch": 0.013718899744240513, "grad_norm": 0.03035644441843033, "learning_rate": 0.00016548607339452853, "loss": 46.0, "step": 1400 }, { "epoch": 0.014208860449391958, "grad_norm": 0.007504388224333525, "learning_rate": 0.00016305526670845226, "loss": 46.0, "step": 1450 }, { "epoch": 0.014698821154543405, "grad_norm": 0.0009108585654757917, "learning_rate": 0.00016056096871376667, "loss": 46.0, "step": 1500 }, { "epoch": 0.014698821154543405, "eval_loss": 11.5, "eval_runtime": 242.924, "eval_samples_per_second": 176.878, "eval_steps_per_second": 88.439, "step": 1500 }, { "epoch": 0.015188781859694852, "grad_norm": 0.0018345484277233481, "learning_rate": 0.00015800569095711982, "loss": 46.0, "step": 1550 }, { "epoch": 0.0156787425648463, "grad_norm": 0.00938267633318901, "learning_rate": 0.00015539200638661104, "loss": 46.0, "step": 1600 }, { "epoch": 0.016168703269997746, "grad_norm": 0.013782349415123463, "learning_rate": 0.00015272254676105025, "loss": 46.0, "step": 1650 }, { "epoch": 0.016658663975149195, "grad_norm": 0.005259850528091192, "learning_rate": 0.00015000000000000001, "loss": 46.0, "step": 1700 }, { "epoch": 0.01714862468030064, "grad_norm": 0.007433369290083647, "learning_rate": 0.0001472271074772683, "loss": 46.0, "step": 1750 }, { "epoch": 0.017638585385452085, "grad_norm": 0.003847072832286358, "learning_rate": 0.00014440666126057744, "loss": 46.0, "step": 1800 }, { "epoch": 0.018128546090603534, "grad_norm": 0.0026508287992328405, "learning_rate": 0.00014154150130018866, "loss": 46.0, "step": 1850 }, { "epoch": 0.01861850679575498, "grad_norm": 0.005416387226432562, "learning_rate": 0.00013863451256931287, "loss": 46.0, "step": 1900 }, { "epoch": 0.019108467500906428, "grad_norm": 0.0011210152879357338, "learning_rate": 0.00013568862215918717, "loss": 46.0, "step": 1950 }, { "epoch": 0.019598428206057873, "grad_norm": 0.004934302996844053, "learning_rate": 0.00013270679633174218, "loss": 46.0, "step": 2000 }, { "epoch": 0.019598428206057873, "eval_loss": 11.5, "eval_runtime": 239.7758, "eval_samples_per_second": 179.201, "eval_steps_per_second": 89.6, "step": 2000 } ], "logging_steps": 50, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 40091744157696.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }