ares / last-checkpoint /trainer_state.json

Training in progress, step 24, checkpoint

68149c6 verified 11 months ago

4.96 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.002324842210416383,
	"eval_steps": 500,
	"global_step": 24,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 9.686842543401596e-05,
	"grad_norm": 0.4089602828025818,
	"learning_rate": 1.9230769230769234e-06,
	"loss": 2.0398,
	"step": 1
	},
	{
	"epoch": 0.00019373685086803192,
	"grad_norm": 0.4970704913139343,
	"learning_rate": 3.846153846153847e-06,
	"loss": 2.0532,
	"step": 2
	},
	{
	"epoch": 0.00029060527630204785,
	"grad_norm": 0.4048117995262146,
	"learning_rate": 5.76923076923077e-06,
	"loss": 2.1092,
	"step": 3
	},
	{
	"epoch": 0.00038747370173606384,
	"grad_norm": 0.41647177934646606,
	"learning_rate": 7.692307692307694e-06,
	"loss": 2.1701,
	"step": 4
	},
	{
	"epoch": 0.00048434212717007977,
	"grad_norm": 0.33691734075546265,
	"learning_rate": 9.615384615384616e-06,
	"loss": 2.0658,
	"step": 5
	},
	{
	"epoch": 0.0005812105526040957,
	"grad_norm": 0.40251973271369934,
	"learning_rate": 1.153846153846154e-05,
	"loss": 2.0623,
	"step": 6
	},
	{
	"epoch": 0.0006780789780381117,
	"grad_norm": 0.32849112153053284,
	"learning_rate": 1.3461538461538462e-05,
	"loss": 2.0855,
	"step": 7
	},
	{
	"epoch": 0.0007749474034721277,
	"grad_norm": 0.29179856181144714,
	"learning_rate": 1.5384615384615387e-05,
	"loss": 1.9376,
	"step": 8
	},
	{
	"epoch": 0.0008718158289061436,
	"grad_norm": 0.34249696135520935,
	"learning_rate": 1.730769230769231e-05,
	"loss": 2.0817,
	"step": 9
	},
	{
	"epoch": 0.0009686842543401595,
	"grad_norm": 0.25032880902290344,
	"learning_rate": 1.923076923076923e-05,
	"loss": 2.0218,
	"step": 10
	},
	{
	"epoch": 0.0010655526797741755,
	"grad_norm": 0.2710218131542206,
	"learning_rate": 2.1153846153846154e-05,
	"loss": 2.0107,
	"step": 11
	},
	{
	"epoch": 0.0011624211052081914,
	"grad_norm": 0.2562181055545807,
	"learning_rate": 2.307692307692308e-05,
	"loss": 1.8556,
	"step": 12
	},
	{
	"epoch": 0.0012592895306422073,
	"grad_norm": 0.15152186155319214,
	"learning_rate": 2.5e-05,
	"loss": 1.938,
	"step": 13
	},
	{
	"epoch": 0.0013561579560762234,
	"grad_norm": 0.19589252769947052,
	"learning_rate": 2.6923076923076923e-05,
	"loss": 1.9859,
	"step": 14
	},
	{
	"epoch": 0.0014530263815102393,
	"grad_norm": 0.17176692187786102,
	"learning_rate": 2.8846153846153845e-05,
	"loss": 1.8812,
	"step": 15
	},
	{
	"epoch": 0.0015498948069442554,
	"grad_norm": 0.18412314355373383,
	"learning_rate": 3.0769230769230774e-05,
	"loss": 2.0042,
	"step": 16
	},
	{
	"epoch": 0.0016467632323782712,
	"grad_norm": 0.19615541398525238,
	"learning_rate": 3.269230769230769e-05,
	"loss": 1.9725,
	"step": 17
	},
	{
	"epoch": 0.001743631657812287,
	"grad_norm": 0.18842636048793793,
	"learning_rate": 3.461538461538462e-05,
	"loss": 1.9555,
	"step": 18
	},
	{
	"epoch": 0.0018405000832463032,
	"grad_norm": 0.23347383737564087,
	"learning_rate": 3.653846153846154e-05,
	"loss": 1.9779,
	"step": 19
	},
	{
	"epoch": 0.001937368508680319,
	"grad_norm": 0.19372476637363434,
	"learning_rate": 3.846153846153846e-05,
	"loss": 1.9203,
	"step": 20
	},
	{
	"epoch": 0.002034236934114335,
	"grad_norm": 0.15928150713443756,
	"learning_rate": 4.038461538461539e-05,
	"loss": 1.8459,
	"step": 21
	},
	{
	"epoch": 0.002131105359548351,
	"grad_norm": 0.18540354073047638,
	"learning_rate": 4.230769230769231e-05,
	"loss": 1.7937,
	"step": 22
	},
	{
	"epoch": 0.0022279737849823667,
	"grad_norm": 0.1321619600057602,
	"learning_rate": 4.423076923076923e-05,
	"loss": 1.8011,
	"step": 23
	},
	{
	"epoch": 0.002324842210416383,
	"grad_norm": 0.1503838449716568,
	"learning_rate": 4.615384615384616e-05,
	"loss": 1.8784,
	"step": 24
	}
	],
	"logging_steps": 1.0,
	"max_steps": 10323,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 4,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 8301560403240960.0,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}