ares / last-checkpoint /trainer_state.json
iamnguyen's picture
Training in progress, step 24, checkpoint
68149c6 verified
raw
history blame contribute delete
4.96 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.002324842210416383,
"eval_steps": 500,
"global_step": 24,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 9.686842543401596e-05,
"grad_norm": 0.4089602828025818,
"learning_rate": 1.9230769230769234e-06,
"loss": 2.0398,
"step": 1
},
{
"epoch": 0.00019373685086803192,
"grad_norm": 0.4970704913139343,
"learning_rate": 3.846153846153847e-06,
"loss": 2.0532,
"step": 2
},
{
"epoch": 0.00029060527630204785,
"grad_norm": 0.4048117995262146,
"learning_rate": 5.76923076923077e-06,
"loss": 2.1092,
"step": 3
},
{
"epoch": 0.00038747370173606384,
"grad_norm": 0.41647177934646606,
"learning_rate": 7.692307692307694e-06,
"loss": 2.1701,
"step": 4
},
{
"epoch": 0.00048434212717007977,
"grad_norm": 0.33691734075546265,
"learning_rate": 9.615384615384616e-06,
"loss": 2.0658,
"step": 5
},
{
"epoch": 0.0005812105526040957,
"grad_norm": 0.40251973271369934,
"learning_rate": 1.153846153846154e-05,
"loss": 2.0623,
"step": 6
},
{
"epoch": 0.0006780789780381117,
"grad_norm": 0.32849112153053284,
"learning_rate": 1.3461538461538462e-05,
"loss": 2.0855,
"step": 7
},
{
"epoch": 0.0007749474034721277,
"grad_norm": 0.29179856181144714,
"learning_rate": 1.5384615384615387e-05,
"loss": 1.9376,
"step": 8
},
{
"epoch": 0.0008718158289061436,
"grad_norm": 0.34249696135520935,
"learning_rate": 1.730769230769231e-05,
"loss": 2.0817,
"step": 9
},
{
"epoch": 0.0009686842543401595,
"grad_norm": 0.25032880902290344,
"learning_rate": 1.923076923076923e-05,
"loss": 2.0218,
"step": 10
},
{
"epoch": 0.0010655526797741755,
"grad_norm": 0.2710218131542206,
"learning_rate": 2.1153846153846154e-05,
"loss": 2.0107,
"step": 11
},
{
"epoch": 0.0011624211052081914,
"grad_norm": 0.2562181055545807,
"learning_rate": 2.307692307692308e-05,
"loss": 1.8556,
"step": 12
},
{
"epoch": 0.0012592895306422073,
"grad_norm": 0.15152186155319214,
"learning_rate": 2.5e-05,
"loss": 1.938,
"step": 13
},
{
"epoch": 0.0013561579560762234,
"grad_norm": 0.19589252769947052,
"learning_rate": 2.6923076923076923e-05,
"loss": 1.9859,
"step": 14
},
{
"epoch": 0.0014530263815102393,
"grad_norm": 0.17176692187786102,
"learning_rate": 2.8846153846153845e-05,
"loss": 1.8812,
"step": 15
},
{
"epoch": 0.0015498948069442554,
"grad_norm": 0.18412314355373383,
"learning_rate": 3.0769230769230774e-05,
"loss": 2.0042,
"step": 16
},
{
"epoch": 0.0016467632323782712,
"grad_norm": 0.19615541398525238,
"learning_rate": 3.269230769230769e-05,
"loss": 1.9725,
"step": 17
},
{
"epoch": 0.001743631657812287,
"grad_norm": 0.18842636048793793,
"learning_rate": 3.461538461538462e-05,
"loss": 1.9555,
"step": 18
},
{
"epoch": 0.0018405000832463032,
"grad_norm": 0.23347383737564087,
"learning_rate": 3.653846153846154e-05,
"loss": 1.9779,
"step": 19
},
{
"epoch": 0.001937368508680319,
"grad_norm": 0.19372476637363434,
"learning_rate": 3.846153846153846e-05,
"loss": 1.9203,
"step": 20
},
{
"epoch": 0.002034236934114335,
"grad_norm": 0.15928150713443756,
"learning_rate": 4.038461538461539e-05,
"loss": 1.8459,
"step": 21
},
{
"epoch": 0.002131105359548351,
"grad_norm": 0.18540354073047638,
"learning_rate": 4.230769230769231e-05,
"loss": 1.7937,
"step": 22
},
{
"epoch": 0.0022279737849823667,
"grad_norm": 0.1321619600057602,
"learning_rate": 4.423076923076923e-05,
"loss": 1.8011,
"step": 23
},
{
"epoch": 0.002324842210416383,
"grad_norm": 0.1503838449716568,
"learning_rate": 4.615384615384616e-05,
"loss": 1.8784,
"step": 24
}
],
"logging_steps": 1.0,
"max_steps": 10323,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 4,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8301560403240960.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}