Training in progress, step 200, checkpoint

526773b verified about 2 months ago

5.46 kB

	{
	"best_metric": 11.5,
	"best_model_checkpoint": "miner_id_24/checkpoint-50",
	"epoch": 0.11668611435239207,
	"eval_steps": 50,
	"global_step": 200,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0005834305717619603,
	"eval_loss": 11.5,
	"eval_runtime": 3.9094,
	"eval_samples_per_second": 184.681,
	"eval_steps_per_second": 46.298,
	"step": 1
	},
	{
	"epoch": 0.005834305717619603,
	"grad_norm": 4.12616936955601e-05,
	"learning_rate": 4.24e-05,
	"loss": 23.0,
	"step": 10
	},
	{
	"epoch": 0.011668611435239206,
	"grad_norm": 2.467960075591691e-05,
	"learning_rate": 8.48e-05,
	"loss": 23.0,
	"step": 20
	},
	{
	"epoch": 0.01750291715285881,
	"grad_norm": 3.497821671771817e-05,
	"learning_rate": 0.0001272,
	"loss": 23.0,
	"step": 30
	},
	{
	"epoch": 0.023337222870478413,
	"grad_norm": 5.835205956827849e-05,
	"learning_rate": 0.0001696,
	"loss": 23.0,
	"step": 40
	},
	{
	"epoch": 0.029171528588098017,
	"grad_norm": 7.785590423736721e-05,
	"learning_rate": 0.000212,
	"loss": 23.0,
	"step": 50
	},
	{
	"epoch": 0.029171528588098017,
	"eval_loss": 11.5,
	"eval_runtime": 3.9019,
	"eval_samples_per_second": 185.037,
	"eval_steps_per_second": 46.387,
	"step": 50
	},
	{
	"epoch": 0.03500583430571762,
	"grad_norm": 2.9545175493694842e-05,
	"learning_rate": 0.00021174178932754136,
	"loss": 23.0,
	"step": 60
	},
	{
	"epoch": 0.040840140023337225,
	"grad_norm": 7.258558616740629e-05,
	"learning_rate": 0.00021096841528660647,
	"loss": 23.0,
	"step": 70
	},
	{
	"epoch": 0.046674445740956826,
	"grad_norm": 6.44875253783539e-05,
	"learning_rate": 0.0002096836456777834,
	"loss": 23.0,
	"step": 80
	},
	{
	"epoch": 0.052508751458576426,
	"grad_norm": 8.886439900379628e-05,
	"learning_rate": 0.00020789373976946182,
	"loss": 23.0,
	"step": 90
	},
	{
	"epoch": 0.058343057176196034,
	"grad_norm": 0.00012685952242463827,
	"learning_rate": 0.0002056074178033063,
	"loss": 23.0,
	"step": 100
	},
	{
	"epoch": 0.058343057176196034,
	"eval_loss": 11.5,
	"eval_runtime": 4.0392,
	"eval_samples_per_second": 178.75,
	"eval_steps_per_second": 44.811,
	"step": 100
	},
	{
	"epoch": 0.06417736289381563,
	"grad_norm": 7.025956438155845e-05,
	"learning_rate": 0.00020283581851011567,
	"loss": 23.0,
	"step": 110
	},
	{
	"epoch": 0.07001166861143523,
	"grad_norm": 0.00010229845065623522,
	"learning_rate": 0.00019959244484304625,
	"loss": 23.0,
	"step": 120
	},
	{
	"epoch": 0.07584597432905485,
	"grad_norm": 0.0003530419198796153,
	"learning_rate": 0.00019589309819258114,
	"loss": 23.0,
	"step": 130
	},
	{
	"epoch": 0.08168028004667445,
	"grad_norm": 0.00015672302106395364,
	"learning_rate": 0.00019175580140374444,
	"loss": 23.0,
	"step": 140
	},
	{
	"epoch": 0.08751458576429405,
	"grad_norm": 0.00022062881907913834,
	"learning_rate": 0.00018720071097061167,
	"loss": 23.0,
	"step": 150
	},
	{
	"epoch": 0.08751458576429405,
	"eval_loss": 11.5,
	"eval_runtime": 3.966,
	"eval_samples_per_second": 182.046,
	"eval_steps_per_second": 45.638,
	"step": 150
	},
	{
	"epoch": 0.09334889148191365,
	"grad_norm": 0.00015543992049060762,
	"learning_rate": 0.00018225001883589702,
	"loss": 23.0,
	"step": 160
	},
	{
	"epoch": 0.09918319719953325,
	"grad_norm": 0.00040469339000992477,
	"learning_rate": 0.00017692784427403898,
	"loss": 23.0,
	"step": 170
	},
	{
	"epoch": 0.10501750291715285,
	"grad_norm": 0.0006950330571271479,
	"learning_rate": 0.00017126011638451976,
	"loss": 23.0,
	"step": 180
	},
	{
	"epoch": 0.11085180863477247,
	"grad_norm": 0.0004899466875940561,
	"learning_rate": 0.00016527444776789915,
	"loss": 23.0,
	"step": 190
	},
	{
	"epoch": 0.11668611435239207,
	"grad_norm": 0.0006722796242684126,
	"learning_rate": 0.00015900000000000002,
	"loss": 23.0,
	"step": 200
	},
	{
	"epoch": 0.11668611435239207,
	"eval_loss": 11.5,
	"eval_runtime": 3.9865,
	"eval_samples_per_second": 181.113,
	"eval_steps_per_second": 45.404,
	"step": 200
	}
	],
	"logging_steps": 10,
	"max_steps": 500,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 50,
	"stateful_callbacks": {
	"EarlyStoppingCallback": {
	"args": {
	"early_stopping_patience": 3,
	"early_stopping_threshold": 0.0
	},
	"attributes": {
	"early_stopping_patience_counter": 3
	}
	},
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 3999582879744.0,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}