|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.934131736526946, |
|
"eval_steps": 500, |
|
"global_step": 123, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11976047904191617, |
|
"grad_norm": 3.5740466117858887, |
|
"learning_rate": 4.979641338636935e-05, |
|
"loss": 2.7298, |
|
"num_input_tokens_seen": 7080, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.23952095808383234, |
|
"grad_norm": 3.4822659492492676, |
|
"learning_rate": 4.918896934621734e-05, |
|
"loss": 1.963, |
|
"num_input_tokens_seen": 14016, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3592814371257485, |
|
"grad_norm": 3.181272268295288, |
|
"learning_rate": 4.8187561277552374e-05, |
|
"loss": 1.3739, |
|
"num_input_tokens_seen": 21024, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.47904191616766467, |
|
"grad_norm": 2.162691593170166, |
|
"learning_rate": 4.680849904257938e-05, |
|
"loss": 0.9519, |
|
"num_input_tokens_seen": 27864, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5988023952095808, |
|
"grad_norm": 2.1968019008636475, |
|
"learning_rate": 4.507424333013069e-05, |
|
"loss": 0.7551, |
|
"num_input_tokens_seen": 34848, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.718562874251497, |
|
"grad_norm": 1.3013643026351929, |
|
"learning_rate": 4.301303984001967e-05, |
|
"loss": 0.6738, |
|
"num_input_tokens_seen": 41496, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8383233532934131, |
|
"grad_norm": 1.2600340843200684, |
|
"learning_rate": 4.0658459247330766e-05, |
|
"loss": 0.6185, |
|
"num_input_tokens_seen": 48360, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.9580838323353293, |
|
"grad_norm": 1.2103824615478516, |
|
"learning_rate": 3.8048850439214844e-05, |
|
"loss": 0.587, |
|
"num_input_tokens_seen": 55512, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0718562874251496, |
|
"grad_norm": 1.5621482133865356, |
|
"learning_rate": 3.5226715929283506e-05, |
|
"loss": 0.4555, |
|
"num_input_tokens_seen": 61968, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.1916167664670658, |
|
"grad_norm": 1.1218465566635132, |
|
"learning_rate": 3.223801962218372e-05, |
|
"loss": 0.5047, |
|
"num_input_tokens_seen": 68544, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.311377245508982, |
|
"grad_norm": 0.9951040744781494, |
|
"learning_rate": 2.9131438202742124e-05, |
|
"loss": 0.453, |
|
"num_input_tokens_seen": 75456, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.4311377245508983, |
|
"grad_norm": 1.395470380783081, |
|
"learning_rate": 2.595756834225089e-05, |
|
"loss": 0.5011, |
|
"num_input_tokens_seen": 82152, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.5508982035928143, |
|
"grad_norm": 1.1427335739135742, |
|
"learning_rate": 2.2768102634070147e-05, |
|
"loss": 0.4825, |
|
"num_input_tokens_seen": 89352, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.6706586826347305, |
|
"grad_norm": 1.1576189994812012, |
|
"learning_rate": 1.961498768002547e-05, |
|
"loss": 0.5319, |
|
"num_input_tokens_seen": 96576, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.7904191616766467, |
|
"grad_norm": 1.177241325378418, |
|
"learning_rate": 1.6549578039787436e-05, |
|
"loss": 0.4653, |
|
"num_input_tokens_seen": 103224, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.910179640718563, |
|
"grad_norm": 1.015809178352356, |
|
"learning_rate": 1.3621799822799788e-05, |
|
"loss": 0.4396, |
|
"num_input_tokens_seen": 110376, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0239520958083834, |
|
"grad_norm": 1.1751824617385864, |
|
"learning_rate": 1.0879337545275165e-05, |
|
"loss": 0.3965, |
|
"num_input_tokens_seen": 116656, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.143712574850299, |
|
"grad_norm": 1.0979323387145996, |
|
"learning_rate": 8.36685749586087e-06, |
|
"loss": 0.4697, |
|
"num_input_tokens_seen": 123904, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.2634730538922154, |
|
"grad_norm": 1.0818545818328857, |
|
"learning_rate": 6.125280258962873e-06, |
|
"loss": 0.4242, |
|
"num_input_tokens_seen": 130552, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.3832335329341316, |
|
"grad_norm": 1.2338745594024658, |
|
"learning_rate": 4.19111424408932e-06, |
|
"loss": 0.3814, |
|
"num_input_tokens_seen": 137200, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.502994011976048, |
|
"grad_norm": 1.1963940858840942, |
|
"learning_rate": 2.595861075973613e-06, |
|
"loss": 0.3991, |
|
"num_input_tokens_seen": 143872, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.622754491017964, |
|
"grad_norm": 1.2328342199325562, |
|
"learning_rate": 1.365502529846166e-06, |
|
"loss": 0.4522, |
|
"num_input_tokens_seen": 150832, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.7425149700598803, |
|
"grad_norm": 1.3839339017868042, |
|
"learning_rate": 5.20077368103597e-07, |
|
"loss": 0.3812, |
|
"num_input_tokens_seen": 157672, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.8622754491017965, |
|
"grad_norm": 1.190239667892456, |
|
"learning_rate": 7.335497040648898e-08, |
|
"loss": 0.4344, |
|
"num_input_tokens_seen": 164776, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.934131736526946, |
|
"num_input_tokens_seen": 169048, |
|
"step": 123, |
|
"total_flos": 992767048187904.0, |
|
"train_loss": 0.696133964430026, |
|
"train_runtime": 82.5141, |
|
"train_samples_per_second": 36.357, |
|
"train_steps_per_second": 1.491 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 123, |
|
"num_input_tokens_seen": 169048, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 992767048187904.0, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|