lesso12's picture
Training in progress, step 200, checkpoint
526773b verified
{
"best_metric": 11.5,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.11668611435239207,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0005834305717619603,
"eval_loss": 11.5,
"eval_runtime": 3.9094,
"eval_samples_per_second": 184.681,
"eval_steps_per_second": 46.298,
"step": 1
},
{
"epoch": 0.005834305717619603,
"grad_norm": 4.12616936955601e-05,
"learning_rate": 4.24e-05,
"loss": 23.0,
"step": 10
},
{
"epoch": 0.011668611435239206,
"grad_norm": 2.467960075591691e-05,
"learning_rate": 8.48e-05,
"loss": 23.0,
"step": 20
},
{
"epoch": 0.01750291715285881,
"grad_norm": 3.497821671771817e-05,
"learning_rate": 0.0001272,
"loss": 23.0,
"step": 30
},
{
"epoch": 0.023337222870478413,
"grad_norm": 5.835205956827849e-05,
"learning_rate": 0.0001696,
"loss": 23.0,
"step": 40
},
{
"epoch": 0.029171528588098017,
"grad_norm": 7.785590423736721e-05,
"learning_rate": 0.000212,
"loss": 23.0,
"step": 50
},
{
"epoch": 0.029171528588098017,
"eval_loss": 11.5,
"eval_runtime": 3.9019,
"eval_samples_per_second": 185.037,
"eval_steps_per_second": 46.387,
"step": 50
},
{
"epoch": 0.03500583430571762,
"grad_norm": 2.9545175493694842e-05,
"learning_rate": 0.00021174178932754136,
"loss": 23.0,
"step": 60
},
{
"epoch": 0.040840140023337225,
"grad_norm": 7.258558616740629e-05,
"learning_rate": 0.00021096841528660647,
"loss": 23.0,
"step": 70
},
{
"epoch": 0.046674445740956826,
"grad_norm": 6.44875253783539e-05,
"learning_rate": 0.0002096836456777834,
"loss": 23.0,
"step": 80
},
{
"epoch": 0.052508751458576426,
"grad_norm": 8.886439900379628e-05,
"learning_rate": 0.00020789373976946182,
"loss": 23.0,
"step": 90
},
{
"epoch": 0.058343057176196034,
"grad_norm": 0.00012685952242463827,
"learning_rate": 0.0002056074178033063,
"loss": 23.0,
"step": 100
},
{
"epoch": 0.058343057176196034,
"eval_loss": 11.5,
"eval_runtime": 4.0392,
"eval_samples_per_second": 178.75,
"eval_steps_per_second": 44.811,
"step": 100
},
{
"epoch": 0.06417736289381563,
"grad_norm": 7.025956438155845e-05,
"learning_rate": 0.00020283581851011567,
"loss": 23.0,
"step": 110
},
{
"epoch": 0.07001166861143523,
"grad_norm": 0.00010229845065623522,
"learning_rate": 0.00019959244484304625,
"loss": 23.0,
"step": 120
},
{
"epoch": 0.07584597432905485,
"grad_norm": 0.0003530419198796153,
"learning_rate": 0.00019589309819258114,
"loss": 23.0,
"step": 130
},
{
"epoch": 0.08168028004667445,
"grad_norm": 0.00015672302106395364,
"learning_rate": 0.00019175580140374444,
"loss": 23.0,
"step": 140
},
{
"epoch": 0.08751458576429405,
"grad_norm": 0.00022062881907913834,
"learning_rate": 0.00018720071097061167,
"loss": 23.0,
"step": 150
},
{
"epoch": 0.08751458576429405,
"eval_loss": 11.5,
"eval_runtime": 3.966,
"eval_samples_per_second": 182.046,
"eval_steps_per_second": 45.638,
"step": 150
},
{
"epoch": 0.09334889148191365,
"grad_norm": 0.00015543992049060762,
"learning_rate": 0.00018225001883589702,
"loss": 23.0,
"step": 160
},
{
"epoch": 0.09918319719953325,
"grad_norm": 0.00040469339000992477,
"learning_rate": 0.00017692784427403898,
"loss": 23.0,
"step": 170
},
{
"epoch": 0.10501750291715285,
"grad_norm": 0.0006950330571271479,
"learning_rate": 0.00017126011638451976,
"loss": 23.0,
"step": 180
},
{
"epoch": 0.11085180863477247,
"grad_norm": 0.0004899466875940561,
"learning_rate": 0.00016527444776789915,
"loss": 23.0,
"step": 190
},
{
"epoch": 0.11668611435239207,
"grad_norm": 0.0006722796242684126,
"learning_rate": 0.00015900000000000002,
"loss": 23.0,
"step": 200
},
{
"epoch": 0.11668611435239207,
"eval_loss": 11.5,
"eval_runtime": 3.9865,
"eval_samples_per_second": 181.113,
"eval_steps_per_second": 45.404,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3999582879744.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}