cantonesellm-lihkg-story-lora-3 / trainer_state.json
wcyat's picture
Upload folder using huggingface_hub
bf6d280 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9412650602409639,
"eval_steps": 500,
"global_step": 625,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03765060240963856,
"grad_norm": 0.29182711243629456,
"learning_rate": 0.0002,
"loss": 2.048,
"step": 25
},
{
"epoch": 0.07530120481927711,
"grad_norm": 0.355770081281662,
"learning_rate": 0.0002,
"loss": 1.9254,
"step": 50
},
{
"epoch": 0.11295180722891567,
"grad_norm": 0.24575483798980713,
"learning_rate": 0.0002,
"loss": 1.8281,
"step": 75
},
{
"epoch": 0.15060240963855423,
"grad_norm": 0.370360791683197,
"learning_rate": 0.0002,
"loss": 1.8354,
"step": 100
},
{
"epoch": 0.18825301204819278,
"grad_norm": 0.30005237460136414,
"learning_rate": 0.0002,
"loss": 1.8066,
"step": 125
},
{
"epoch": 0.22590361445783133,
"grad_norm": 0.4004781246185303,
"learning_rate": 0.0002,
"loss": 2.0024,
"step": 150
},
{
"epoch": 0.2635542168674699,
"grad_norm": 0.37836953997612,
"learning_rate": 0.0002,
"loss": 1.9717,
"step": 175
},
{
"epoch": 0.30120481927710846,
"grad_norm": 0.38621386885643005,
"learning_rate": 0.0002,
"loss": 2.0938,
"step": 200
},
{
"epoch": 0.338855421686747,
"grad_norm": 0.32652556896209717,
"learning_rate": 0.0002,
"loss": 1.9128,
"step": 225
},
{
"epoch": 0.37650602409638556,
"grad_norm": 0.3783712387084961,
"learning_rate": 0.0002,
"loss": 1.7907,
"step": 250
},
{
"epoch": 0.4141566265060241,
"grad_norm": 0.2575533390045166,
"learning_rate": 0.0002,
"loss": 1.8501,
"step": 275
},
{
"epoch": 0.45180722891566266,
"grad_norm": 0.37206143140792847,
"learning_rate": 0.0002,
"loss": 1.7147,
"step": 300
},
{
"epoch": 0.4894578313253012,
"grad_norm": 0.3018392324447632,
"learning_rate": 0.0002,
"loss": 1.602,
"step": 325
},
{
"epoch": 0.5271084337349398,
"grad_norm": 0.45196229219436646,
"learning_rate": 0.0002,
"loss": 1.8455,
"step": 350
},
{
"epoch": 0.5647590361445783,
"grad_norm": 0.250590443611145,
"learning_rate": 0.0002,
"loss": 1.9206,
"step": 375
},
{
"epoch": 0.6024096385542169,
"grad_norm": 0.314155638217926,
"learning_rate": 0.0002,
"loss": 1.908,
"step": 400
},
{
"epoch": 0.6400602409638554,
"grad_norm": 0.319771409034729,
"learning_rate": 0.0002,
"loss": 1.6045,
"step": 425
},
{
"epoch": 0.677710843373494,
"grad_norm": 0.4088861346244812,
"learning_rate": 0.0002,
"loss": 1.7106,
"step": 450
},
{
"epoch": 0.7153614457831325,
"grad_norm": 0.29917973279953003,
"learning_rate": 0.0002,
"loss": 1.7349,
"step": 475
},
{
"epoch": 0.7530120481927711,
"grad_norm": 0.6233075857162476,
"learning_rate": 0.0002,
"loss": 1.9191,
"step": 500
},
{
"epoch": 0.7906626506024096,
"grad_norm": 0.24983790516853333,
"learning_rate": 0.0002,
"loss": 1.6759,
"step": 525
},
{
"epoch": 0.8283132530120482,
"grad_norm": 0.42871519923210144,
"learning_rate": 0.0002,
"loss": 1.7982,
"step": 550
},
{
"epoch": 0.8659638554216867,
"grad_norm": 0.3003176748752594,
"learning_rate": 0.0002,
"loss": 1.7154,
"step": 575
},
{
"epoch": 0.9036144578313253,
"grad_norm": 0.4300028681755066,
"learning_rate": 0.0002,
"loss": 1.6376,
"step": 600
},
{
"epoch": 0.9412650602409639,
"grad_norm": 0.38282954692840576,
"learning_rate": 0.0002,
"loss": 1.545,
"step": 625
}
],
"logging_steps": 25,
"max_steps": 664,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.109390150045696e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}