|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.002324842210416383, |
|
"eval_steps": 500, |
|
"global_step": 24, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 9.686842543401596e-05, |
|
"grad_norm": 0.4089602828025818, |
|
"learning_rate": 1.9230769230769234e-06, |
|
"loss": 2.0398, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00019373685086803192, |
|
"grad_norm": 0.4970704913139343, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 2.0532, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00029060527630204785, |
|
"grad_norm": 0.4048117995262146, |
|
"learning_rate": 5.76923076923077e-06, |
|
"loss": 2.1092, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00038747370173606384, |
|
"grad_norm": 0.41647177934646606, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 2.1701, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00048434212717007977, |
|
"grad_norm": 0.33691734075546265, |
|
"learning_rate": 9.615384615384616e-06, |
|
"loss": 2.0658, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0005812105526040957, |
|
"grad_norm": 0.40251973271369934, |
|
"learning_rate": 1.153846153846154e-05, |
|
"loss": 2.0623, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0006780789780381117, |
|
"grad_norm": 0.32849112153053284, |
|
"learning_rate": 1.3461538461538462e-05, |
|
"loss": 2.0855, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0007749474034721277, |
|
"grad_norm": 0.29179856181144714, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 1.9376, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0008718158289061436, |
|
"grad_norm": 0.34249696135520935, |
|
"learning_rate": 1.730769230769231e-05, |
|
"loss": 2.0817, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0009686842543401595, |
|
"grad_norm": 0.25032880902290344, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 2.0218, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0010655526797741755, |
|
"grad_norm": 0.2710218131542206, |
|
"learning_rate": 2.1153846153846154e-05, |
|
"loss": 2.0107, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0011624211052081914, |
|
"grad_norm": 0.2562181055545807, |
|
"learning_rate": 2.307692307692308e-05, |
|
"loss": 1.8556, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0012592895306422073, |
|
"grad_norm": 0.15152186155319214, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.938, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0013561579560762234, |
|
"grad_norm": 0.19589252769947052, |
|
"learning_rate": 2.6923076923076923e-05, |
|
"loss": 1.9859, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0014530263815102393, |
|
"grad_norm": 0.17176692187786102, |
|
"learning_rate": 2.8846153846153845e-05, |
|
"loss": 1.8812, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0015498948069442554, |
|
"grad_norm": 0.18412314355373383, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 2.0042, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0016467632323782712, |
|
"grad_norm": 0.19615541398525238, |
|
"learning_rate": 3.269230769230769e-05, |
|
"loss": 1.9725, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.001743631657812287, |
|
"grad_norm": 0.18842636048793793, |
|
"learning_rate": 3.461538461538462e-05, |
|
"loss": 1.9555, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0018405000832463032, |
|
"grad_norm": 0.23347383737564087, |
|
"learning_rate": 3.653846153846154e-05, |
|
"loss": 1.9779, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.001937368508680319, |
|
"grad_norm": 0.19372476637363434, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 1.9203, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002034236934114335, |
|
"grad_norm": 0.15928150713443756, |
|
"learning_rate": 4.038461538461539e-05, |
|
"loss": 1.8459, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002131105359548351, |
|
"grad_norm": 0.18540354073047638, |
|
"learning_rate": 4.230769230769231e-05, |
|
"loss": 1.7937, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0022279737849823667, |
|
"grad_norm": 0.1321619600057602, |
|
"learning_rate": 4.423076923076923e-05, |
|
"loss": 1.8011, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.002324842210416383, |
|
"grad_norm": 0.1503838449716568, |
|
"learning_rate": 4.615384615384616e-05, |
|
"loss": 1.8784, |
|
"step": 24 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 10323, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 4, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8301560403240960.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|