hiepnkv's picture
Upload folder using huggingface_hub
32610ca verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.5006134969325153,
"eval_steps": 51,
"global_step": 153,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05889570552147239,
"grad_norm": 0.17814360558986664,
"learning_rate": 9.803921568627451e-06,
"loss": 0.1429,
"step": 6
},
{
"epoch": 0.11779141104294479,
"grad_norm": 0.19341157376766205,
"learning_rate": 2.1568627450980395e-05,
"loss": 0.1421,
"step": 12
},
{
"epoch": 0.17668711656441718,
"grad_norm": 0.21557286381721497,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.1137,
"step": 18
},
{
"epoch": 0.23558282208588957,
"grad_norm": 0.0923461839556694,
"learning_rate": 4.5098039215686275e-05,
"loss": 0.0903,
"step": 24
},
{
"epoch": 0.294478527607362,
"grad_norm": 0.0747547447681427,
"learning_rate": 5.6862745098039215e-05,
"loss": 0.0541,
"step": 30
},
{
"epoch": 0.35337423312883437,
"grad_norm": 0.06449007987976074,
"learning_rate": 6.862745098039216e-05,
"loss": 0.056,
"step": 36
},
{
"epoch": 0.41226993865030676,
"grad_norm": 0.06695199757814407,
"learning_rate": 8.039215686274511e-05,
"loss": 0.0378,
"step": 42
},
{
"epoch": 0.47116564417177914,
"grad_norm": 0.057652052491903305,
"learning_rate": 9.215686274509804e-05,
"loss": 0.0333,
"step": 48
},
{
"epoch": 0.5006134969325153,
"eval_loss": 0.04454955831170082,
"eval_runtime": 99.1976,
"eval_samples_per_second": 0.917,
"eval_steps_per_second": 0.917,
"step": 51
},
{
"epoch": 0.5300613496932516,
"grad_norm": 0.07416237890720367,
"learning_rate": 9.99952117026961e-05,
"loss": 0.0317,
"step": 54
},
{
"epoch": 0.588957055214724,
"grad_norm": 0.0759715884923935,
"learning_rate": 9.992340558396519e-05,
"loss": 0.0333,
"step": 60
},
{
"epoch": 0.6478527607361964,
"grad_norm": 0.06889687478542328,
"learning_rate": 9.976555313435489e-05,
"loss": 0.0265,
"step": 66
},
{
"epoch": 0.7067484662576687,
"grad_norm": 0.06610561907291412,
"learning_rate": 9.952192642312712e-05,
"loss": 0.0267,
"step": 72
},
{
"epoch": 0.7656441717791411,
"grad_norm": 0.05757423862814903,
"learning_rate": 9.919294535722452e-05,
"loss": 0.0276,
"step": 78
},
{
"epoch": 0.8245398773006135,
"grad_norm": 0.05512451380491257,
"learning_rate": 9.877917695753274e-05,
"loss": 0.0224,
"step": 84
},
{
"epoch": 0.8834355828220859,
"grad_norm": 0.06681577116250992,
"learning_rate": 9.828133438158205e-05,
"loss": 0.02,
"step": 90
},
{
"epoch": 0.9423312883435583,
"grad_norm": 0.07138796895742416,
"learning_rate": 9.770027569437253e-05,
"loss": 0.0263,
"step": 96
},
{
"epoch": 1.0,
"grad_norm": 0.06135529652237892,
"learning_rate": 9.703700238944158e-05,
"loss": 0.0189,
"step": 102
},
{
"epoch": 1.0,
"eval_loss": 0.027291039004921913,
"eval_runtime": 90.02,
"eval_samples_per_second": 1.011,
"eval_steps_per_second": 1.011,
"step": 102
},
{
"epoch": 1.0588957055214725,
"grad_norm": 0.09823279082775116,
"learning_rate": 9.629265766272292e-05,
"loss": 0.0158,
"step": 108
},
{
"epoch": 1.1177914110429448,
"grad_norm": 0.05707687512040138,
"learning_rate": 9.54685244421718e-05,
"loss": 0.017,
"step": 114
},
{
"epoch": 1.1766871165644173,
"grad_norm": 0.06987980008125305,
"learning_rate": 9.456602317655275e-05,
"loss": 0.0169,
"step": 120
},
{
"epoch": 1.2355828220858895,
"grad_norm": 0.06296130269765854,
"learning_rate": 9.358670938720113e-05,
"loss": 0.0162,
"step": 126
},
{
"epoch": 1.294478527607362,
"grad_norm": 0.059489328414201736,
"learning_rate": 9.253227098697803e-05,
"loss": 0.0166,
"step": 132
},
{
"epoch": 1.3533742331288343,
"grad_norm": 0.06373760849237442,
"learning_rate": 9.140452537103942e-05,
"loss": 0.0188,
"step": 138
},
{
"epoch": 1.4122699386503068,
"grad_norm": 0.06238365173339844,
"learning_rate": 9.020541628443395e-05,
"loss": 0.0161,
"step": 144
},
{
"epoch": 1.471165644171779,
"grad_norm": 0.071172334253788,
"learning_rate": 8.893701047192833e-05,
"loss": 0.0196,
"step": 150
},
{
"epoch": 1.5006134969325153,
"eval_loss": 0.024013910442590714,
"eval_runtime": 90.4486,
"eval_samples_per_second": 1.006,
"eval_steps_per_second": 1.006,
"step": 153
}
],
"logging_steps": 6,
"max_steps": 505,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 51,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.87590411229184e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}