qwen1.5B_500test_lora / trainer_state.json
xzc2002's picture
Upload folder using huggingface_hub
c323f55 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.97196261682243,
"eval_steps": 500,
"global_step": 240,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12461059190031153,
"grad_norm": 0.9156144261360168,
"learning_rate": 0.00019250000000000002,
"loss": 0.3465,
"step": 10
},
{
"epoch": 0.24922118380062305,
"grad_norm": 0.8195157647132874,
"learning_rate": 0.00018416666666666665,
"loss": 0.329,
"step": 20
},
{
"epoch": 0.37383177570093457,
"grad_norm": 0.6960992813110352,
"learning_rate": 0.00017583333333333334,
"loss": 0.2746,
"step": 30
},
{
"epoch": 0.4984423676012461,
"grad_norm": 0.6025754809379578,
"learning_rate": 0.0001675,
"loss": 0.311,
"step": 40
},
{
"epoch": 0.6230529595015576,
"grad_norm": 0.8380621075630188,
"learning_rate": 0.00015916666666666667,
"loss": 0.324,
"step": 50
},
{
"epoch": 0.7476635514018691,
"grad_norm": 0.7013344764709473,
"learning_rate": 0.00015083333333333333,
"loss": 0.3203,
"step": 60
},
{
"epoch": 0.8722741433021807,
"grad_norm": 0.7946021556854248,
"learning_rate": 0.00014250000000000002,
"loss": 0.3048,
"step": 70
},
{
"epoch": 0.9968847352024922,
"grad_norm": 0.7311714887619019,
"learning_rate": 0.00013416666666666666,
"loss": 0.2809,
"step": 80
},
{
"epoch": 1.1121495327102804,
"grad_norm": 0.8304562568664551,
"learning_rate": 0.00012583333333333335,
"loss": 0.2608,
"step": 90
},
{
"epoch": 1.236760124610592,
"grad_norm": 0.6632652878761292,
"learning_rate": 0.00011750000000000001,
"loss": 0.2839,
"step": 100
},
{
"epoch": 1.3613707165109035,
"grad_norm": 0.6893765330314636,
"learning_rate": 0.00010916666666666666,
"loss": 0.2732,
"step": 110
},
{
"epoch": 1.485981308411215,
"grad_norm": 0.7527514100074768,
"learning_rate": 0.00010083333333333334,
"loss": 0.2954,
"step": 120
},
{
"epoch": 1.6105919003115265,
"grad_norm": 0.6240414977073669,
"learning_rate": 9.250000000000001e-05,
"loss": 0.2624,
"step": 130
},
{
"epoch": 1.735202492211838,
"grad_norm": 0.7276539206504822,
"learning_rate": 8.416666666666668e-05,
"loss": 0.2713,
"step": 140
},
{
"epoch": 1.8598130841121496,
"grad_norm": 0.7341501712799072,
"learning_rate": 7.583333333333334e-05,
"loss": 0.2519,
"step": 150
},
{
"epoch": 1.9844236760124612,
"grad_norm": 0.8342993259429932,
"learning_rate": 6.750000000000001e-05,
"loss": 0.2686,
"step": 160
},
{
"epoch": 2.0996884735202492,
"grad_norm": 0.6449198126792908,
"learning_rate": 5.916666666666667e-05,
"loss": 0.2368,
"step": 170
},
{
"epoch": 2.2242990654205608,
"grad_norm": 0.5292518734931946,
"learning_rate": 5.0833333333333333e-05,
"loss": 0.2497,
"step": 180
},
{
"epoch": 2.3489096573208723,
"grad_norm": 0.7724623084068298,
"learning_rate": 4.25e-05,
"loss": 0.2412,
"step": 190
},
{
"epoch": 2.473520249221184,
"grad_norm": 0.7042115330696106,
"learning_rate": 3.4166666666666666e-05,
"loss": 0.2488,
"step": 200
},
{
"epoch": 2.5981308411214954,
"grad_norm": 0.675959050655365,
"learning_rate": 2.5833333333333336e-05,
"loss": 0.2772,
"step": 210
},
{
"epoch": 2.722741433021807,
"grad_norm": 0.6327322721481323,
"learning_rate": 1.75e-05,
"loss": 0.2684,
"step": 220
},
{
"epoch": 2.8473520249221185,
"grad_norm": 0.4853314757347107,
"learning_rate": 9.166666666666666e-06,
"loss": 0.2581,
"step": 230
},
{
"epoch": 2.97196261682243,
"grad_norm": 0.7433005571365356,
"learning_rate": 8.333333333333333e-07,
"loss": 0.2624,
"step": 240
}
],
"logging_steps": 10,
"max_steps": 240,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 30,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.596380699384218e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}