Royal_ZhTW-ID_finetuned_101 / trainer_state.json
roylin1003's picture
Upload LoRA adapter files
16a62f9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 84,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17857142857142858,
"grad_norm": 1.3622558116912842,
"learning_rate": 0.00019825664732332884,
"loss": 1.4496,
"num_input_tokens_seen": 15664,
"step": 5
},
{
"epoch": 0.35714285714285715,
"grad_norm": 0.8986290097236633,
"learning_rate": 0.00019308737486442045,
"loss": 1.051,
"num_input_tokens_seen": 30528,
"step": 10
},
{
"epoch": 0.5357142857142857,
"grad_norm": 1.14683997631073,
"learning_rate": 0.00018467241992282843,
"loss": 0.9827,
"num_input_tokens_seen": 43392,
"step": 15
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.8037276864051819,
"learning_rate": 0.00017330518718298264,
"loss": 1.0249,
"num_input_tokens_seen": 60784,
"step": 20
},
{
"epoch": 0.8928571428571429,
"grad_norm": 0.8258629441261292,
"learning_rate": 0.00015938201855735014,
"loss": 0.9651,
"num_input_tokens_seen": 76080,
"step": 25
},
{
"epoch": 1.0714285714285714,
"grad_norm": 0.9029279947280884,
"learning_rate": 0.00014338837391175582,
"loss": 0.9298,
"num_input_tokens_seen": 90224,
"step": 30
},
{
"epoch": 1.25,
"grad_norm": 0.8744432926177979,
"learning_rate": 0.00012588190451025207,
"loss": 0.7694,
"num_input_tokens_seen": 107072,
"step": 35
},
{
"epoch": 1.4285714285714286,
"grad_norm": 1.3319528102874756,
"learning_rate": 0.00010747300935864243,
"loss": 0.6823,
"num_input_tokens_seen": 123216,
"step": 40
},
{
"epoch": 1.6071428571428572,
"grad_norm": 1.1126036643981934,
"learning_rate": 8.880355238966923e-05,
"loss": 0.5681,
"num_input_tokens_seen": 138416,
"step": 45
},
{
"epoch": 1.7857142857142856,
"grad_norm": 1.8243435621261597,
"learning_rate": 7.052448255890957e-05,
"loss": 0.6239,
"num_input_tokens_seen": 152784,
"step": 50
},
{
"epoch": 1.9642857142857144,
"grad_norm": 1.5611317157745361,
"learning_rate": 5.32731371726938e-05,
"loss": 0.6441,
"num_input_tokens_seen": 167264,
"step": 55
},
{
"epoch": 2.142857142857143,
"grad_norm": 1.1920043230056763,
"learning_rate": 3.7651019814126654e-05,
"loss": 0.5202,
"num_input_tokens_seen": 181488,
"step": 60
},
{
"epoch": 2.3214285714285716,
"grad_norm": 1.1858474016189575,
"learning_rate": 2.420282768545469e-05,
"loss": 0.443,
"num_input_tokens_seen": 198512,
"step": 65
},
{
"epoch": 2.5,
"grad_norm": 1.2865468263626099,
"learning_rate": 1.339745962155613e-05,
"loss": 0.4209,
"num_input_tokens_seen": 213040,
"step": 70
},
{
"epoch": 2.678571428571429,
"grad_norm": 1.2891031503677368,
"learning_rate": 5.611666969163243e-06,
"loss": 0.4818,
"num_input_tokens_seen": 229312,
"step": 75
},
{
"epoch": 2.857142857142857,
"grad_norm": 1.2530392408370972,
"learning_rate": 1.1169173774871478e-06,
"loss": 0.5292,
"num_input_tokens_seen": 244944,
"step": 80
}
],
"logging_steps": 5,
"max_steps": 84,
"num_input_tokens_seen": 257136,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0939806209654784e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}