{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.934131736526946, "eval_steps": 500, "global_step": 123, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11976047904191617, "grad_norm": 3.5740466117858887, "learning_rate": 4.979641338636935e-05, "loss": 2.7298, "num_input_tokens_seen": 7080, "step": 5 }, { "epoch": 0.23952095808383234, "grad_norm": 3.4822659492492676, "learning_rate": 4.918896934621734e-05, "loss": 1.963, "num_input_tokens_seen": 14016, "step": 10 }, { "epoch": 0.3592814371257485, "grad_norm": 3.181272268295288, "learning_rate": 4.8187561277552374e-05, "loss": 1.3739, "num_input_tokens_seen": 21024, "step": 15 }, { "epoch": 0.47904191616766467, "grad_norm": 2.162691593170166, "learning_rate": 4.680849904257938e-05, "loss": 0.9519, "num_input_tokens_seen": 27864, "step": 20 }, { "epoch": 0.5988023952095808, "grad_norm": 2.1968019008636475, "learning_rate": 4.507424333013069e-05, "loss": 0.7551, "num_input_tokens_seen": 34848, "step": 25 }, { "epoch": 0.718562874251497, "grad_norm": 1.3013643026351929, "learning_rate": 4.301303984001967e-05, "loss": 0.6738, "num_input_tokens_seen": 41496, "step": 30 }, { "epoch": 0.8383233532934131, "grad_norm": 1.2600340843200684, "learning_rate": 4.0658459247330766e-05, "loss": 0.6185, "num_input_tokens_seen": 48360, "step": 35 }, { "epoch": 0.9580838323353293, "grad_norm": 1.2103824615478516, "learning_rate": 3.8048850439214844e-05, "loss": 0.587, "num_input_tokens_seen": 55512, "step": 40 }, { "epoch": 1.0718562874251496, "grad_norm": 1.5621482133865356, "learning_rate": 3.5226715929283506e-05, "loss": 0.4555, "num_input_tokens_seen": 61968, "step": 45 }, { "epoch": 1.1916167664670658, "grad_norm": 1.1218465566635132, "learning_rate": 3.223801962218372e-05, "loss": 0.5047, "num_input_tokens_seen": 68544, "step": 50 }, { "epoch": 1.311377245508982, "grad_norm": 0.9951040744781494, "learning_rate": 2.9131438202742124e-05, "loss": 0.453, "num_input_tokens_seen": 75456, "step": 55 }, { "epoch": 1.4311377245508983, "grad_norm": 1.395470380783081, "learning_rate": 2.595756834225089e-05, "loss": 0.5011, "num_input_tokens_seen": 82152, "step": 60 }, { "epoch": 1.5508982035928143, "grad_norm": 1.1427335739135742, "learning_rate": 2.2768102634070147e-05, "loss": 0.4825, "num_input_tokens_seen": 89352, "step": 65 }, { "epoch": 1.6706586826347305, "grad_norm": 1.1576189994812012, "learning_rate": 1.961498768002547e-05, "loss": 0.5319, "num_input_tokens_seen": 96576, "step": 70 }, { "epoch": 1.7904191616766467, "grad_norm": 1.177241325378418, "learning_rate": 1.6549578039787436e-05, "loss": 0.4653, "num_input_tokens_seen": 103224, "step": 75 }, { "epoch": 1.910179640718563, "grad_norm": 1.015809178352356, "learning_rate": 1.3621799822799788e-05, "loss": 0.4396, "num_input_tokens_seen": 110376, "step": 80 }, { "epoch": 2.0239520958083834, "grad_norm": 1.1751824617385864, "learning_rate": 1.0879337545275165e-05, "loss": 0.3965, "num_input_tokens_seen": 116656, "step": 85 }, { "epoch": 2.143712574850299, "grad_norm": 1.0979323387145996, "learning_rate": 8.36685749586087e-06, "loss": 0.4697, "num_input_tokens_seen": 123904, "step": 90 }, { "epoch": 2.2634730538922154, "grad_norm": 1.0818545818328857, "learning_rate": 6.125280258962873e-06, "loss": 0.4242, "num_input_tokens_seen": 130552, "step": 95 }, { "epoch": 2.3832335329341316, "grad_norm": 1.2338745594024658, "learning_rate": 4.19111424408932e-06, "loss": 0.3814, "num_input_tokens_seen": 137200, "step": 100 }, { "epoch": 2.502994011976048, "grad_norm": 1.1963940858840942, "learning_rate": 2.595861075973613e-06, "loss": 0.3991, "num_input_tokens_seen": 143872, "step": 105 }, { "epoch": 2.622754491017964, "grad_norm": 1.2328342199325562, "learning_rate": 1.365502529846166e-06, "loss": 0.4522, "num_input_tokens_seen": 150832, "step": 110 }, { "epoch": 2.7425149700598803, "grad_norm": 1.3839339017868042, "learning_rate": 5.20077368103597e-07, "loss": 0.3812, "num_input_tokens_seen": 157672, "step": 115 }, { "epoch": 2.8622754491017965, "grad_norm": 1.190239667892456, "learning_rate": 7.335497040648898e-08, "loss": 0.4344, "num_input_tokens_seen": 164776, "step": 120 }, { "epoch": 2.934131736526946, "num_input_tokens_seen": 169048, "step": 123, "total_flos": 992767048187904.0, "train_loss": 0.696133964430026, "train_runtime": 82.5141, "train_samples_per_second": 36.357, "train_steps_per_second": 1.491 } ], "logging_steps": 5, "max_steps": 123, "num_input_tokens_seen": 169048, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 992767048187904.0, "train_batch_size": 3, "trial_name": null, "trial_params": null }