{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9977298524404086, "eval_steps": 500, "global_step": 1174, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04256526674233825, "grad_norm": 0.23498961329460144, "learning_rate": 0.00019657827202737384, "loss": 0.1046, "step": 25 }, { "epoch": 0.0851305334846765, "grad_norm": 0.14751878380775452, "learning_rate": 0.00019230111206159113, "loss": 0.0926, "step": 50 }, { "epoch": 0.12769580022701477, "grad_norm": 0.2522989511489868, "learning_rate": 0.0001880239520958084, "loss": 0.0971, "step": 75 }, { "epoch": 0.170261066969353, "grad_norm": 0.15176503360271454, "learning_rate": 0.00018374679213002568, "loss": 0.0928, "step": 100 }, { "epoch": 0.21282633371169127, "grad_norm": 0.16757148504257202, "learning_rate": 0.00017946963216424296, "loss": 0.0916, "step": 125 }, { "epoch": 0.25539160045402953, "grad_norm": 0.2590785026550293, "learning_rate": 0.00017519247219846022, "loss": 0.0923, "step": 150 }, { "epoch": 0.29795686719636777, "grad_norm": 0.19378352165222168, "learning_rate": 0.0001709153122326775, "loss": 0.0921, "step": 175 }, { "epoch": 0.340522133938706, "grad_norm": 0.2084362953901291, "learning_rate": 0.0001666381522668948, "loss": 0.0888, "step": 200 }, { "epoch": 0.38308740068104424, "grad_norm": 0.17699547111988068, "learning_rate": 0.00016236099230111209, "loss": 0.0898, "step": 225 }, { "epoch": 0.42565266742338254, "grad_norm": 0.1759597808122635, "learning_rate": 0.00015808383233532935, "loss": 0.0868, "step": 250 }, { "epoch": 0.4682179341657208, "grad_norm": 0.1610288918018341, "learning_rate": 0.00015380667236954663, "loss": 0.0907, "step": 275 }, { "epoch": 0.5107832009080591, "grad_norm": 0.13977083563804626, "learning_rate": 0.00014952951240376392, "loss": 0.086, "step": 300 }, { "epoch": 0.5533484676503972, "grad_norm": 0.18051184713840485, "learning_rate": 0.00014525235243798118, "loss": 0.0831, "step": 325 }, { "epoch": 0.5959137343927355, "grad_norm": 0.2083287537097931, "learning_rate": 0.00014097519247219847, "loss": 0.086, "step": 350 }, { "epoch": 0.6384790011350738, "grad_norm": 0.1912999302148819, "learning_rate": 0.00013669803250641576, "loss": 0.0839, "step": 375 }, { "epoch": 0.681044267877412, "grad_norm": 0.17170144617557526, "learning_rate": 0.00013242087254063302, "loss": 0.0848, "step": 400 }, { "epoch": 0.7236095346197503, "grad_norm": 0.2047215849161148, "learning_rate": 0.0001281437125748503, "loss": 0.079, "step": 425 }, { "epoch": 0.7661748013620885, "grad_norm": 0.14967381954193115, "learning_rate": 0.0001238665526090676, "loss": 0.0844, "step": 450 }, { "epoch": 0.8087400681044268, "grad_norm": 0.15834014117717743, "learning_rate": 0.00011958939264328485, "loss": 0.0821, "step": 475 }, { "epoch": 0.8513053348467651, "grad_norm": 0.16844715178012848, "learning_rate": 0.00011531223267750215, "loss": 0.0836, "step": 500 }, { "epoch": 0.8938706015891033, "grad_norm": 0.18447309732437134, "learning_rate": 0.00011103507271171943, "loss": 0.0829, "step": 525 }, { "epoch": 0.9364358683314415, "grad_norm": 0.1440054476261139, "learning_rate": 0.00010675791274593669, "loss": 0.0792, "step": 550 }, { "epoch": 0.9790011350737798, "grad_norm": 0.17830029129981995, "learning_rate": 0.00010248075278015399, "loss": 0.0821, "step": 575 }, { "epoch": 1.0204313280363224, "grad_norm": 0.2284756600856781, "learning_rate": 9.820359281437126e-05, "loss": 0.0793, "step": 600 }, { "epoch": 1.0629965947786606, "grad_norm": 0.15237772464752197, "learning_rate": 9.392643284858854e-05, "loss": 0.0771, "step": 625 }, { "epoch": 1.1055618615209988, "grad_norm": 0.18322055041790009, "learning_rate": 8.964927288280582e-05, "loss": 0.0815, "step": 650 }, { "epoch": 1.1481271282633372, "grad_norm": 0.17401781678199768, "learning_rate": 8.537211291702311e-05, "loss": 0.0826, "step": 675 }, { "epoch": 1.1906923950056754, "grad_norm": 0.23275992274284363, "learning_rate": 8.109495295124037e-05, "loss": 0.0769, "step": 700 }, { "epoch": 1.2332576617480135, "grad_norm": 0.23144161701202393, "learning_rate": 7.681779298545766e-05, "loss": 0.0761, "step": 725 }, { "epoch": 1.275822928490352, "grad_norm": 0.2251117080450058, "learning_rate": 7.254063301967495e-05, "loss": 0.0768, "step": 750 }, { "epoch": 1.3183881952326901, "grad_norm": 0.18892104923725128, "learning_rate": 6.826347305389222e-05, "loss": 0.0787, "step": 775 }, { "epoch": 1.3609534619750283, "grad_norm": 0.19995813071727753, "learning_rate": 6.39863130881095e-05, "loss": 0.0725, "step": 800 }, { "epoch": 1.4035187287173665, "grad_norm": 0.19511419534683228, "learning_rate": 5.9709153122326776e-05, "loss": 0.0729, "step": 825 }, { "epoch": 1.446083995459705, "grad_norm": 0.2229546159505844, "learning_rate": 5.543199315654406e-05, "loss": 0.0721, "step": 850 }, { "epoch": 1.488649262202043, "grad_norm": 0.2024296671152115, "learning_rate": 5.1154833190761344e-05, "loss": 0.0727, "step": 875 }, { "epoch": 1.5312145289443815, "grad_norm": 0.24508166313171387, "learning_rate": 4.687767322497862e-05, "loss": 0.071, "step": 900 }, { "epoch": 1.5737797956867197, "grad_norm": 0.2000231146812439, "learning_rate": 4.260051325919589e-05, "loss": 0.0722, "step": 925 }, { "epoch": 1.6163450624290578, "grad_norm": 0.16820313036441803, "learning_rate": 3.832335329341318e-05, "loss": 0.07, "step": 950 }, { "epoch": 1.658910329171396, "grad_norm": 0.17715366184711456, "learning_rate": 3.4046193327630454e-05, "loss": 0.0552, "step": 975 }, { "epoch": 1.7014755959137344, "grad_norm": 0.17357434332370758, "learning_rate": 2.9769033361847738e-05, "loss": 0.0708, "step": 1000 }, { "epoch": 1.7440408626560726, "grad_norm": 0.19597357511520386, "learning_rate": 2.5491873396065015e-05, "loss": 0.0618, "step": 1025 }, { "epoch": 1.786606129398411, "grad_norm": 0.2510164976119995, "learning_rate": 2.1214713430282293e-05, "loss": 0.068, "step": 1050 }, { "epoch": 1.8291713961407492, "grad_norm": 0.30005839467048645, "learning_rate": 1.6937553464499573e-05, "loss": 0.0672, "step": 1075 }, { "epoch": 1.8717366628830874, "grad_norm": 0.19194893538951874, "learning_rate": 1.2660393498716852e-05, "loss": 0.0681, "step": 1100 }, { "epoch": 1.9143019296254256, "grad_norm": 0.16069871187210083, "learning_rate": 8.383233532934131e-06, "loss": 0.0682, "step": 1125 }, { "epoch": 1.9568671963677637, "grad_norm": 0.22674943506717682, "learning_rate": 4.106073567151412e-06, "loss": 0.0686, "step": 1150 } ], "logging_steps": 25, "max_steps": 1174, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.90004424169257e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }