{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.15687043482523652, "eval_steps": 9, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015687043482523653, "eval_loss": 1.8242242336273193, "eval_runtime": 142.2157, "eval_samples_per_second": 15.104, "eval_steps_per_second": 7.552, "step": 1 }, { "epoch": 0.004706113044757096, "grad_norm": 0.7702743411064148, "learning_rate": 3e-05, "loss": 1.8967, "step": 3 }, { "epoch": 0.009412226089514192, "grad_norm": 0.7187179327011108, "learning_rate": 6e-05, "loss": 1.7129, "step": 6 }, { "epoch": 0.014118339134271287, "grad_norm": 0.5630563497543335, "learning_rate": 9e-05, "loss": 1.6705, "step": 9 }, { "epoch": 0.014118339134271287, "eval_loss": 1.6580013036727905, "eval_runtime": 137.3695, "eval_samples_per_second": 15.637, "eval_steps_per_second": 7.818, "step": 9 }, { "epoch": 0.018824452179028384, "grad_norm": 0.6352736353874207, "learning_rate": 0.00012, "loss": 1.6757, "step": 12 }, { "epoch": 0.02353056522378548, "grad_norm": 0.5756219029426575, "learning_rate": 0.00015000000000000001, "loss": 1.6138, "step": 15 }, { "epoch": 0.028236678268542575, "grad_norm": 0.5083643794059753, "learning_rate": 0.00018, "loss": 1.6546, "step": 18 }, { "epoch": 0.028236678268542575, "eval_loss": 1.5876401662826538, "eval_runtime": 145.4617, "eval_samples_per_second": 14.767, "eval_steps_per_second": 7.383, "step": 18 }, { "epoch": 0.03294279131329967, "grad_norm": 0.4145301878452301, "learning_rate": 0.0001999229036240723, "loss": 1.5349, "step": 21 }, { "epoch": 0.03764890435805677, "grad_norm": 0.42459216713905334, "learning_rate": 0.00019876883405951377, "loss": 1.5664, "step": 24 }, { "epoch": 0.04235501740281386, "grad_norm": 0.4329008460044861, "learning_rate": 0.00019624552364536473, "loss": 1.5261, "step": 27 }, { "epoch": 0.04235501740281386, "eval_loss": 1.5502554178237915, "eval_runtime": 148.1658, "eval_samples_per_second": 14.497, "eval_steps_per_second": 7.249, "step": 27 }, { "epoch": 0.04706113044757096, "grad_norm": 0.4267719089984894, "learning_rate": 0.0001923879532511287, "loss": 1.6148, "step": 30 }, { "epoch": 0.05176724349232806, "grad_norm": 0.4181731641292572, "learning_rate": 0.00018724960070727972, "loss": 1.5572, "step": 33 }, { "epoch": 0.05647335653708515, "grad_norm": 0.4355478584766388, "learning_rate": 0.00018090169943749476, "loss": 1.4369, "step": 36 }, { "epoch": 0.05647335653708515, "eval_loss": 1.531390905380249, "eval_runtime": 137.4641, "eval_samples_per_second": 15.626, "eval_steps_per_second": 7.813, "step": 36 }, { "epoch": 0.06117946958184225, "grad_norm": 0.4404115080833435, "learning_rate": 0.00017343225094356855, "loss": 1.515, "step": 39 }, { "epoch": 0.06588558262659934, "grad_norm": 0.39275091886520386, "learning_rate": 0.00016494480483301836, "loss": 1.5592, "step": 42 }, { "epoch": 0.07059169567135644, "grad_norm": 0.37436187267303467, "learning_rate": 0.00015555702330196023, "loss": 1.5643, "step": 45 }, { "epoch": 0.07059169567135644, "eval_loss": 1.515994906425476, "eval_runtime": 137.4794, "eval_samples_per_second": 15.624, "eval_steps_per_second": 7.812, "step": 45 }, { "epoch": 0.07529780871611354, "grad_norm": 0.39492252469062805, "learning_rate": 0.00014539904997395468, "loss": 1.5264, "step": 48 }, { "epoch": 0.08000392176087064, "grad_norm": 0.39957186579704285, "learning_rate": 0.0001346117057077493, "loss": 1.5242, "step": 51 }, { "epoch": 0.08471003480562772, "grad_norm": 0.3886667490005493, "learning_rate": 0.00012334453638559057, "loss": 1.4754, "step": 54 }, { "epoch": 0.08471003480562772, "eval_loss": 1.5080097913742065, "eval_runtime": 148.2053, "eval_samples_per_second": 14.493, "eval_steps_per_second": 7.247, "step": 54 }, { "epoch": 0.08941614785038482, "grad_norm": 0.3844744861125946, "learning_rate": 0.00011175373974578378, "loss": 1.4846, "step": 57 }, { "epoch": 0.09412226089514192, "grad_norm": 0.37326759099960327, "learning_rate": 0.0001, "loss": 1.4674, "step": 60 }, { "epoch": 0.09882837393989902, "grad_norm": 0.3983359634876251, "learning_rate": 8.824626025421626e-05, "loss": 1.4584, "step": 63 }, { "epoch": 0.09882837393989902, "eval_loss": 1.5008231401443481, "eval_runtime": 148.4011, "eval_samples_per_second": 14.474, "eval_steps_per_second": 7.237, "step": 63 }, { "epoch": 0.10353448698465612, "grad_norm": 0.3726574778556824, "learning_rate": 7.66554636144095e-05, "loss": 1.4221, "step": 66 }, { "epoch": 0.1082406000294132, "grad_norm": 0.3979525566101074, "learning_rate": 6.538829429225069e-05, "loss": 1.5156, "step": 69 }, { "epoch": 0.1129467130741703, "grad_norm": 0.3521616756916046, "learning_rate": 5.4600950026045326e-05, "loss": 1.4337, "step": 72 }, { "epoch": 0.1129467130741703, "eval_loss": 1.4966624975204468, "eval_runtime": 137.8159, "eval_samples_per_second": 15.586, "eval_steps_per_second": 7.793, "step": 72 }, { "epoch": 0.1176528261189274, "grad_norm": 0.3657877743244171, "learning_rate": 4.444297669803981e-05, "loss": 1.5035, "step": 75 }, { "epoch": 0.1223589391636845, "grad_norm": 0.3593926727771759, "learning_rate": 3.5055195166981645e-05, "loss": 1.4785, "step": 78 }, { "epoch": 0.12706505220844158, "grad_norm": 0.37910911440849304, "learning_rate": 2.6567749056431467e-05, "loss": 1.5454, "step": 81 }, { "epoch": 0.12706505220844158, "eval_loss": 1.4934673309326172, "eval_runtime": 137.7475, "eval_samples_per_second": 15.594, "eval_steps_per_second": 7.797, "step": 81 }, { "epoch": 0.13177116525319868, "grad_norm": 0.38638541102409363, "learning_rate": 1.9098300562505266e-05, "loss": 1.4935, "step": 84 }, { "epoch": 0.13647727829795578, "grad_norm": 0.3478032350540161, "learning_rate": 1.2750399292720283e-05, "loss": 1.4452, "step": 87 }, { "epoch": 0.14118339134271288, "grad_norm": 0.34858161211013794, "learning_rate": 7.612046748871327e-06, "loss": 1.4752, "step": 90 }, { "epoch": 0.14118339134271288, "eval_loss": 1.492301106452942, "eval_runtime": 148.3193, "eval_samples_per_second": 14.482, "eval_steps_per_second": 7.241, "step": 90 }, { "epoch": 0.14588950438746998, "grad_norm": 0.37905701994895935, "learning_rate": 3.7544763546352834e-06, "loss": 1.5333, "step": 93 }, { "epoch": 0.15059561743222707, "grad_norm": 0.37757179141044617, "learning_rate": 1.231165940486234e-06, "loss": 1.4955, "step": 96 }, { "epoch": 0.15530173047698417, "grad_norm": 0.3686583936214447, "learning_rate": 7.709637592770991e-08, "loss": 1.4875, "step": 99 }, { "epoch": 0.15530173047698417, "eval_loss": 1.4919251203536987, "eval_runtime": 145.2178, "eval_samples_per_second": 14.792, "eval_steps_per_second": 7.396, "step": 99 } ], "logging_steps": 3, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 9, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.589828125877862e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }