{ "best_metric": 0.8934311866760254, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.04434589800443459, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0017738359201773836, "grad_norm": 1.6331450939178467, "learning_rate": 2e-05, "loss": 2.1491, "step": 1 }, { "epoch": 0.0017738359201773836, "eval_loss": 2.156221389770508, "eval_runtime": 34.1792, "eval_samples_per_second": 6.963, "eval_steps_per_second": 3.482, "step": 1 }, { "epoch": 0.003547671840354767, "grad_norm": 2.142427444458008, "learning_rate": 4e-05, "loss": 2.2744, "step": 2 }, { "epoch": 0.005321507760532151, "grad_norm": 1.335496425628662, "learning_rate": 6e-05, "loss": 1.8577, "step": 3 }, { "epoch": 0.007095343680709534, "grad_norm": 1.5356032848358154, "learning_rate": 8e-05, "loss": 1.9983, "step": 4 }, { "epoch": 0.008869179600886918, "grad_norm": 1.8492310047149658, "learning_rate": 0.0001, "loss": 2.2588, "step": 5 }, { "epoch": 0.008869179600886918, "eval_loss": 2.057093858718872, "eval_runtime": 32.7002, "eval_samples_per_second": 7.278, "eval_steps_per_second": 3.639, "step": 5 }, { "epoch": 0.010643015521064302, "grad_norm": 1.8534923791885376, "learning_rate": 0.00012, "loss": 2.068, "step": 6 }, { "epoch": 0.012416851441241685, "grad_norm": 2.1694960594177246, "learning_rate": 0.00014, "loss": 1.8945, "step": 7 }, { "epoch": 0.014190687361419069, "grad_norm": 1.7592840194702148, "learning_rate": 0.00016, "loss": 1.7782, "step": 8 }, { "epoch": 0.015964523281596452, "grad_norm": 2.2886457443237305, "learning_rate": 0.00018, "loss": 1.5152, "step": 9 }, { "epoch": 0.017738359201773836, "grad_norm": 1.9047895669937134, "learning_rate": 0.0002, "loss": 1.3502, "step": 10 }, { "epoch": 0.017738359201773836, "eval_loss": 1.2971845865249634, "eval_runtime": 33.0416, "eval_samples_per_second": 7.203, "eval_steps_per_second": 3.602, "step": 10 }, { "epoch": 0.01951219512195122, "grad_norm": 1.956074833869934, "learning_rate": 0.00019781476007338058, "loss": 1.3962, "step": 11 }, { "epoch": 0.021286031042128603, "grad_norm": 1.7686818838119507, "learning_rate": 0.0001913545457642601, "loss": 1.1724, "step": 12 }, { "epoch": 0.023059866962305987, "grad_norm": 1.6020398139953613, "learning_rate": 0.00018090169943749476, "loss": 1.0728, "step": 13 }, { "epoch": 0.02483370288248337, "grad_norm": 1.6922051906585693, "learning_rate": 0.00016691306063588583, "loss": 0.9902, "step": 14 }, { "epoch": 0.026607538802660754, "grad_norm": 2.132755756378174, "learning_rate": 0.00015000000000000001, "loss": 0.9552, "step": 15 }, { "epoch": 0.026607538802660754, "eval_loss": 1.0003092288970947, "eval_runtime": 34.7928, "eval_samples_per_second": 6.84, "eval_steps_per_second": 3.42, "step": 15 }, { "epoch": 0.028381374722838137, "grad_norm": 1.8932487964630127, "learning_rate": 0.00013090169943749476, "loss": 1.0874, "step": 16 }, { "epoch": 0.03015521064301552, "grad_norm": 2.181488037109375, "learning_rate": 0.00011045284632676536, "loss": 1.0457, "step": 17 }, { "epoch": 0.031929046563192905, "grad_norm": 2.006877899169922, "learning_rate": 8.954715367323468e-05, "loss": 1.0567, "step": 18 }, { "epoch": 0.03370288248337029, "grad_norm": 1.926302433013916, "learning_rate": 6.909830056250527e-05, "loss": 0.9321, "step": 19 }, { "epoch": 0.03547671840354767, "grad_norm": 1.6763228178024292, "learning_rate": 5.000000000000002e-05, "loss": 1.141, "step": 20 }, { "epoch": 0.03547671840354767, "eval_loss": 0.9107561111450195, "eval_runtime": 35.8064, "eval_samples_per_second": 6.647, "eval_steps_per_second": 3.323, "step": 20 }, { "epoch": 0.03725055432372506, "grad_norm": 1.6538779735565186, "learning_rate": 3.308693936411421e-05, "loss": 0.8503, "step": 21 }, { "epoch": 0.03902439024390244, "grad_norm": 1.8892109394073486, "learning_rate": 1.9098300562505266e-05, "loss": 0.9907, "step": 22 }, { "epoch": 0.040798226164079826, "grad_norm": 2.003721237182617, "learning_rate": 8.645454235739903e-06, "loss": 0.7393, "step": 23 }, { "epoch": 0.042572062084257206, "grad_norm": 1.7759606838226318, "learning_rate": 2.1852399266194314e-06, "loss": 1.0311, "step": 24 }, { "epoch": 0.04434589800443459, "grad_norm": 1.840668797492981, "learning_rate": 0.0, "loss": 0.7071, "step": 25 }, { "epoch": 0.04434589800443459, "eval_loss": 0.8934311866760254, "eval_runtime": 32.808, "eval_samples_per_second": 7.254, "eval_steps_per_second": 3.627, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 601363788595200.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }