{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14245690678569733, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0047485635595232445, "grad_norm": 0.3049672544002533, "learning_rate": 8.333333333333334e-06, "loss": 0.623, "step": 100 }, { "epoch": 0.009497127119046489, "grad_norm": 0.09646450728178024, "learning_rate": 1.6666666666666667e-05, "loss": 0.29, "step": 200 }, { "epoch": 0.014245690678569733, "grad_norm": 0.16680897772312164, "learning_rate": 2.5e-05, "loss": 0.1391, "step": 300 }, { "epoch": 0.018994254238092978, "grad_norm": 0.17046234011650085, "learning_rate": 3.3333333333333335e-05, "loss": 0.1325, "step": 400 }, { "epoch": 0.023742817797616222, "grad_norm": 0.08646216243505478, "learning_rate": 4.166666666666667e-05, "loss": 0.1238, "step": 500 }, { "epoch": 0.028491381357139467, "grad_norm": 0.09403830766677856, "learning_rate": 5e-05, "loss": 0.1128, "step": 600 }, { "epoch": 0.03323994491666271, "grad_norm": 0.08856412768363953, "learning_rate": 4.9786121534345265e-05, "loss": 0.1134, "step": 700 }, { "epoch": 0.037988508476185956, "grad_norm": 0.1673099398612976, "learning_rate": 4.914814565722671e-05, "loss": 0.1108, "step": 800 }, { "epoch": 0.0427370720357092, "grad_norm": 0.07016578316688538, "learning_rate": 4.8096988312782174e-05, "loss": 0.1088, "step": 900 }, { "epoch": 0.047485635595232445, "grad_norm": 0.07002697139978409, "learning_rate": 4.665063509461097e-05, "loss": 0.1096, "step": 1000 }, { "epoch": 0.05223419915475569, "grad_norm": 0.16299889981746674, "learning_rate": 4.4833833507280884e-05, "loss": 0.1071, "step": 1100 }, { "epoch": 0.056982762714278934, "grad_norm": 0.06416748464107513, "learning_rate": 4.267766952966369e-05, "loss": 0.107, "step": 1200 }, { "epoch": 0.06173132627380217, "grad_norm": 0.06463466584682465, "learning_rate": 4.021903572521802e-05, "loss": 0.1062, "step": 1300 }, { "epoch": 0.06647988983332542, "grad_norm": 0.07041608542203903, "learning_rate": 3.7500000000000003e-05, "loss": 0.1065, "step": 1400 }, { "epoch": 0.07122845339284867, "grad_norm": 0.07932303100824356, "learning_rate": 3.456708580912725e-05, "loss": 0.1044, "step": 1500 }, { "epoch": 0.07597701695237191, "grad_norm": 0.061066512018442154, "learning_rate": 3.147047612756302e-05, "loss": 0.1034, "step": 1600 }, { "epoch": 0.08072558051189516, "grad_norm": 0.06134779006242752, "learning_rate": 2.8263154805501297e-05, "loss": 0.1034, "step": 1700 }, { "epoch": 0.0854741440714184, "grad_norm": 0.06713131070137024, "learning_rate": 2.5e-05, "loss": 0.1049, "step": 1800 }, { "epoch": 0.09022270763094165, "grad_norm": 0.09950446337461472, "learning_rate": 2.173684519449872e-05, "loss": 0.105, "step": 1900 }, { "epoch": 0.09497127119046489, "grad_norm": 0.06452978402376175, "learning_rate": 1.852952387243698e-05, "loss": 0.1054, "step": 2000 }, { "epoch": 0.09971983474998813, "grad_norm": 0.11814086139202118, "learning_rate": 1.5432914190872757e-05, "loss": 0.1031, "step": 2100 }, { "epoch": 0.10446839830951138, "grad_norm": 0.08128858357667923, "learning_rate": 1.2500000000000006e-05, "loss": 0.103, "step": 2200 }, { "epoch": 0.10921696186903462, "grad_norm": 0.07054319977760315, "learning_rate": 9.780964274781984e-06, "loss": 0.103, "step": 2300 }, { "epoch": 0.11396552542855787, "grad_norm": 0.06334717571735382, "learning_rate": 7.3223304703363135e-06, "loss": 0.1034, "step": 2400 }, { "epoch": 0.11871408898808111, "grad_norm": 0.07204329967498779, "learning_rate": 5.166166492719124e-06, "loss": 0.1059, "step": 2500 }, { "epoch": 0.12346265254760434, "grad_norm": 0.05405697599053383, "learning_rate": 3.3493649053890326e-06, "loss": 0.105, "step": 2600 }, { "epoch": 0.1282112161071276, "grad_norm": 0.06032924726605415, "learning_rate": 1.9030116872178316e-06, "loss": 0.1009, "step": 2700 }, { "epoch": 0.13295977966665085, "grad_norm": 0.07012154161930084, "learning_rate": 8.51854342773295e-07, "loss": 0.1042, "step": 2800 }, { "epoch": 0.1377083432261741, "grad_norm": 0.07376914471387863, "learning_rate": 2.1387846565474045e-07, "loss": 0.103, "step": 2900 }, { "epoch": 0.14245690678569733, "grad_norm": 0.09159684926271439, "learning_rate": 0.0, "loss": 0.1014, "step": 3000 } ], "logging_steps": 100, "max_steps": 3000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.658973782016e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }