{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.2284122562674096, "eval_steps": 100, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.055710306406685235, "grad_norm": 13.33180221602099, "learning_rate": 1.8518518518518519e-06, "loss": 0.7002, "step": 10 }, { "epoch": 0.11142061281337047, "grad_norm": 1.6650732524504632, "learning_rate": 3.7037037037037037e-06, "loss": 0.2272, "step": 20 }, { "epoch": 0.1671309192200557, "grad_norm": 1.3298246517786452, "learning_rate": 5.555555555555557e-06, "loss": 0.1798, "step": 30 }, { "epoch": 0.22284122562674094, "grad_norm": 1.5154839686376809, "learning_rate": 7.4074074074074075e-06, "loss": 0.1663, "step": 40 }, { "epoch": 0.2785515320334262, "grad_norm": 1.4493281388620185, "learning_rate": 9.25925925925926e-06, "loss": 0.1698, "step": 50 }, { "epoch": 0.3342618384401114, "grad_norm": 1.4507180686803252, "learning_rate": 9.99619291237835e-06, "loss": 0.1547, "step": 60 }, { "epoch": 0.38997214484679665, "grad_norm": 1.408311153993914, "learning_rate": 9.972948368899302e-06, "loss": 0.1573, "step": 70 }, { "epoch": 0.4456824512534819, "grad_norm": 1.411285032932152, "learning_rate": 9.928672515804434e-06, "loss": 0.1651, "step": 80 }, { "epoch": 0.5013927576601671, "grad_norm": 1.5921373458809418, "learning_rate": 9.863552602006435e-06, "loss": 0.1564, "step": 90 }, { "epoch": 0.5571030640668524, "grad_norm": 1.1123208209611752, "learning_rate": 9.777864028930705e-06, "loss": 0.1599, "step": 100 }, { "epoch": 0.5571030640668524, "eval_loss": 0.14965404570102692, "eval_runtime": 14.3523, "eval_samples_per_second": 42.084, "eval_steps_per_second": 1.324, "step": 100 }, { "epoch": 0.6128133704735376, "grad_norm": 1.3165488984028846, "learning_rate": 9.671969185803357e-06, "loss": 0.1539, "step": 110 }, { "epoch": 0.6685236768802229, "grad_norm": 1.2598963561948195, "learning_rate": 9.546315917055362e-06, "loss": 0.158, "step": 120 }, { "epoch": 0.724233983286908, "grad_norm": 1.2565192353667611, "learning_rate": 9.401435628324436e-06, "loss": 0.172, "step": 130 }, { "epoch": 0.7799442896935933, "grad_norm": 1.3015497330703336, "learning_rate": 9.237941039064606e-06, "loss": 0.1604, "step": 140 }, { "epoch": 0.8356545961002786, "grad_norm": 1.3910423306638404, "learning_rate": 9.056523591268064e-06, "loss": 0.1552, "step": 150 }, { "epoch": 0.8913649025069638, "grad_norm": 1.1494310395657203, "learning_rate": 8.85795052525811e-06, "loss": 0.1623, "step": 160 }, { "epoch": 0.947075208913649, "grad_norm": 1.1057721484907344, "learning_rate": 8.643061634920146e-06, "loss": 0.159, "step": 170 }, { "epoch": 1.0027855153203342, "grad_norm": 1.4188078858125839, "learning_rate": 8.412765716093273e-06, "loss": 0.1627, "step": 180 }, { "epoch": 1.0584958217270195, "grad_norm": 0.7623378341371474, "learning_rate": 8.168036723142743e-06, "loss": 0.0859, "step": 190 }, { "epoch": 1.1142061281337048, "grad_norm": 1.0235514533508279, "learning_rate": 7.909909649967735e-06, "loss": 0.0851, "step": 200 }, { "epoch": 1.1142061281337048, "eval_loss": 0.16079136729240417, "eval_runtime": 13.581, "eval_samples_per_second": 44.474, "eval_steps_per_second": 1.399, "step": 200 }, { "epoch": 1.16991643454039, "grad_norm": 0.8096480773426198, "learning_rate": 7.639476152864163e-06, "loss": 0.0793, "step": 210 }, { "epoch": 1.2256267409470751, "grad_norm": 0.8281128819137961, "learning_rate": 7.35787993375414e-06, "loss": 0.0812, "step": 220 }, { "epoch": 1.2813370473537604, "grad_norm": 0.9370312703563903, "learning_rate": 7.066311903307033e-06, "loss": 0.0791, "step": 230 }, { "epoch": 1.3370473537604457, "grad_norm": 0.9053649627789484, "learning_rate": 6.76600514440799e-06, "loss": 0.084, "step": 240 }, { "epoch": 1.392757660167131, "grad_norm": 0.9028081686357128, "learning_rate": 6.458229697274125e-06, "loss": 0.0872, "step": 250 }, { "epoch": 1.448467966573816, "grad_norm": 0.8100454278966028, "learning_rate": 6.144287188272867e-06, "loss": 0.0845, "step": 260 }, { "epoch": 1.5041782729805013, "grad_norm": 1.1552403857017284, "learning_rate": 5.825505325157962e-06, "loss": 0.0837, "step": 270 }, { "epoch": 1.5598885793871866, "grad_norm": 0.8948418601771239, "learning_rate": 5.503232282003569e-06, "loss": 0.0828, "step": 280 }, { "epoch": 1.615598885793872, "grad_norm": 0.8307633796010723, "learning_rate": 5.178830997583353e-06, "loss": 0.0839, "step": 290 }, { "epoch": 1.6713091922005572, "grad_norm": 0.9993028011882762, "learning_rate": 4.853673411307564e-06, "loss": 0.0793, "step": 300 }, { "epoch": 1.6713091922005572, "eval_loss": 0.15588445961475372, "eval_runtime": 13.5881, "eval_samples_per_second": 44.451, "eval_steps_per_second": 1.398, "step": 300 }, { "epoch": 1.7270194986072425, "grad_norm": 1.029481086800917, "learning_rate": 4.529134661095114e-06, "loss": 0.0755, "step": 310 }, { "epoch": 1.7827298050139275, "grad_norm": 0.8067395371226609, "learning_rate": 4.206587267718743e-06, "loss": 0.0827, "step": 320 }, { "epoch": 1.8384401114206128, "grad_norm": 0.9301538402093087, "learning_rate": 3.887395330218429e-06, "loss": 0.0825, "step": 330 }, { "epoch": 1.894150417827298, "grad_norm": 0.8980361744773078, "learning_rate": 3.5729087569315284e-06, "loss": 0.0817, "step": 340 }, { "epoch": 1.9498607242339832, "grad_norm": 0.9129952399332847, "learning_rate": 3.26445755653744e-06, "loss": 0.0856, "step": 350 }, { "epoch": 2.0055710306406684, "grad_norm": 0.5738192618476439, "learning_rate": 2.963346213260737e-06, "loss": 0.0829, "step": 360 }, { "epoch": 2.0612813370473537, "grad_norm": 0.6386795168831956, "learning_rate": 2.6708481700208954e-06, "loss": 0.0314, "step": 370 }, { "epoch": 2.116991643454039, "grad_norm": 0.8423736663221459, "learning_rate": 2.3882004428601213e-06, "loss": 0.0273, "step": 380 }, { "epoch": 2.1727019498607243, "grad_norm": 0.696882651835955, "learning_rate": 2.1165983894256647e-06, "loss": 0.0301, "step": 390 }, { "epoch": 2.2284122562674096, "grad_norm": 0.9565822826782887, "learning_rate": 1.8571906536314233e-06, "loss": 0.0275, "step": 400 }, { "epoch": 2.2284122562674096, "eval_loss": 0.19155950844287872, "eval_runtime": 13.6617, "eval_samples_per_second": 44.211, "eval_steps_per_second": 1.391, "step": 400 } ], "logging_steps": 10, "max_steps": 537, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 67047933214720.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }