qwen1.5B_500test_lora / trainer_state.json

xzc2002

Upload folder using huggingface_hub

c323f55 verified 3 months ago

4.9 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.97196261682243,
	"eval_steps": 500,
	"global_step": 240,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.12461059190031153,
	"grad_norm": 0.9156144261360168,
	"learning_rate": 0.00019250000000000002,
	"loss": 0.3465,
	"step": 10
	},
	{
	"epoch": 0.24922118380062305,
	"grad_norm": 0.8195157647132874,
	"learning_rate": 0.00018416666666666665,
	"loss": 0.329,
	"step": 20
	},
	{
	"epoch": 0.37383177570093457,
	"grad_norm": 0.6960992813110352,
	"learning_rate": 0.00017583333333333334,
	"loss": 0.2746,
	"step": 30
	},
	{
	"epoch": 0.4984423676012461,
	"grad_norm": 0.6025754809379578,
	"learning_rate": 0.0001675,
	"loss": 0.311,
	"step": 40
	},
	{
	"epoch": 0.6230529595015576,
	"grad_norm": 0.8380621075630188,
	"learning_rate": 0.00015916666666666667,
	"loss": 0.324,
	"step": 50
	},
	{
	"epoch": 0.7476635514018691,
	"grad_norm": 0.7013344764709473,
	"learning_rate": 0.00015083333333333333,
	"loss": 0.3203,
	"step": 60
	},
	{
	"epoch": 0.8722741433021807,
	"grad_norm": 0.7946021556854248,
	"learning_rate": 0.00014250000000000002,
	"loss": 0.3048,
	"step": 70
	},
	{
	"epoch": 0.9968847352024922,
	"grad_norm": 0.7311714887619019,
	"learning_rate": 0.00013416666666666666,
	"loss": 0.2809,
	"step": 80
	},
	{
	"epoch": 1.1121495327102804,
	"grad_norm": 0.8304562568664551,
	"learning_rate": 0.00012583333333333335,
	"loss": 0.2608,
	"step": 90
	},
	{
	"epoch": 1.236760124610592,
	"grad_norm": 0.6632652878761292,
	"learning_rate": 0.00011750000000000001,
	"loss": 0.2839,
	"step": 100
	},
	{
	"epoch": 1.3613707165109035,
	"grad_norm": 0.6893765330314636,
	"learning_rate": 0.00010916666666666666,
	"loss": 0.2732,
	"step": 110
	},
	{
	"epoch": 1.485981308411215,
	"grad_norm": 0.7527514100074768,
	"learning_rate": 0.00010083333333333334,
	"loss": 0.2954,
	"step": 120
	},
	{
	"epoch": 1.6105919003115265,
	"grad_norm": 0.6240414977073669,
	"learning_rate": 9.250000000000001e-05,
	"loss": 0.2624,
	"step": 130
	},
	{
	"epoch": 1.735202492211838,
	"grad_norm": 0.7276539206504822,
	"learning_rate": 8.416666666666668e-05,
	"loss": 0.2713,
	"step": 140
	},
	{
	"epoch": 1.8598130841121496,
	"grad_norm": 0.7341501712799072,
	"learning_rate": 7.583333333333334e-05,
	"loss": 0.2519,
	"step": 150
	},
	{
	"epoch": 1.9844236760124612,
	"grad_norm": 0.8342993259429932,
	"learning_rate": 6.750000000000001e-05,
	"loss": 0.2686,
	"step": 160
	},
	{
	"epoch": 2.0996884735202492,
	"grad_norm": 0.6449198126792908,
	"learning_rate": 5.916666666666667e-05,
	"loss": 0.2368,
	"step": 170
	},
	{
	"epoch": 2.2242990654205608,
	"grad_norm": 0.5292518734931946,
	"learning_rate": 5.0833333333333333e-05,
	"loss": 0.2497,
	"step": 180
	},
	{
	"epoch": 2.3489096573208723,
	"grad_norm": 0.7724623084068298,
	"learning_rate": 4.25e-05,
	"loss": 0.2412,
	"step": 190
	},
	{
	"epoch": 2.473520249221184,
	"grad_norm": 0.7042115330696106,
	"learning_rate": 3.4166666666666666e-05,
	"loss": 0.2488,
	"step": 200
	},
	{
	"epoch": 2.5981308411214954,
	"grad_norm": 0.675959050655365,
	"learning_rate": 2.5833333333333336e-05,
	"loss": 0.2772,
	"step": 210
	},
	{
	"epoch": 2.722741433021807,
	"grad_norm": 0.6327322721481323,
	"learning_rate": 1.75e-05,
	"loss": 0.2684,
	"step": 220
	},
	{
	"epoch": 2.8473520249221185,
	"grad_norm": 0.4853314757347107,
	"learning_rate": 9.166666666666666e-06,
	"loss": 0.2581,
	"step": 230
	},
	{
	"epoch": 2.97196261682243,
	"grad_norm": 0.7433005571365356,
	"learning_rate": 8.333333333333333e-07,
	"loss": 0.2624,
	"step": 240
	}
	],
	"logging_steps": 10,
	"max_steps": 240,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 30,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 4.596380699384218e+16,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}