chauhoang's picture
Training in progress, step 50, checkpoint
5cb3ee8 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.18248175182481752,
"eval_steps": 10,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0036496350364963502,
"eval_loss": 2.616699695587158,
"eval_runtime": 6.3179,
"eval_samples_per_second": 18.361,
"eval_steps_per_second": 9.18,
"step": 1
},
{
"epoch": 0.01824817518248175,
"grad_norm": 1.6743769645690918,
"learning_rate": 5e-05,
"loss": 2.5534,
"step": 5
},
{
"epoch": 0.0364963503649635,
"grad_norm": 1.8287861347198486,
"learning_rate": 0.0001,
"loss": 2.3519,
"step": 10
},
{
"epoch": 0.0364963503649635,
"eval_loss": 2.002800464630127,
"eval_runtime": 6.3103,
"eval_samples_per_second": 18.383,
"eval_steps_per_second": 9.191,
"step": 10
},
{
"epoch": 0.05474452554744526,
"grad_norm": 2.0079925060272217,
"learning_rate": 9.619397662556435e-05,
"loss": 1.7359,
"step": 15
},
{
"epoch": 0.072992700729927,
"grad_norm": 2.5511863231658936,
"learning_rate": 8.535533905932738e-05,
"loss": 1.1344,
"step": 20
},
{
"epoch": 0.072992700729927,
"eval_loss": 0.8217958807945251,
"eval_runtime": 6.4298,
"eval_samples_per_second": 18.041,
"eval_steps_per_second": 9.021,
"step": 20
},
{
"epoch": 0.09124087591240876,
"grad_norm": 2.521385908126831,
"learning_rate": 6.91341716182545e-05,
"loss": 0.6589,
"step": 25
},
{
"epoch": 0.10948905109489052,
"grad_norm": 2.6327896118164062,
"learning_rate": 5e-05,
"loss": 0.3088,
"step": 30
},
{
"epoch": 0.10948905109489052,
"eval_loss": 0.17604775726795197,
"eval_runtime": 6.5101,
"eval_samples_per_second": 17.818,
"eval_steps_per_second": 8.909,
"step": 30
},
{
"epoch": 0.12773722627737227,
"grad_norm": 2.246385335922241,
"learning_rate": 3.086582838174551e-05,
"loss": 0.1334,
"step": 35
},
{
"epoch": 0.145985401459854,
"grad_norm": 1.123745322227478,
"learning_rate": 1.4644660940672627e-05,
"loss": 0.064,
"step": 40
},
{
"epoch": 0.145985401459854,
"eval_loss": 0.043108660727739334,
"eval_runtime": 6.4968,
"eval_samples_per_second": 17.855,
"eval_steps_per_second": 8.927,
"step": 40
},
{
"epoch": 0.16423357664233576,
"grad_norm": 1.209367036819458,
"learning_rate": 3.8060233744356633e-06,
"loss": 0.0385,
"step": 45
},
{
"epoch": 0.18248175182481752,
"grad_norm": 0.65992271900177,
"learning_rate": 0.0,
"loss": 0.0407,
"step": 50
},
{
"epoch": 0.18248175182481752,
"eval_loss": 0.03230349346995354,
"eval_runtime": 6.4146,
"eval_samples_per_second": 18.084,
"eval_steps_per_second": 9.042,
"step": 50
}
],
"logging_steps": 5,
"max_steps": 50,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 13,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2500486653542400.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}