|
{ |
|
"best_global_step": 7670, |
|
"best_metric": 0.6811214685440063, |
|
"best_model_checkpoint": "tinybert_base_train_book_ent_15p_s_init_mnli/checkpoint-7670", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 15340, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.6886646747589111, |
|
"learning_rate": 4.90006518904824e-05, |
|
"loss": 0.8785, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6660213958227204, |
|
"eval_loss": 0.7728874087333679, |
|
"eval_runtime": 5.2916, |
|
"eval_samples_per_second": 1854.821, |
|
"eval_steps_per_second": 7.37, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.5701472759246826, |
|
"learning_rate": 4.80006518904824e-05, |
|
"loss": 0.7353, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.691492613346918, |
|
"eval_loss": 0.7123497724533081, |
|
"eval_runtime": 5.2657, |
|
"eval_samples_per_second": 1863.958, |
|
"eval_steps_per_second": 7.406, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.9975154399871826, |
|
"learning_rate": 4.70006518904824e-05, |
|
"loss": 0.6658, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7072847682119205, |
|
"eval_loss": 0.6983441710472107, |
|
"eval_runtime": 5.2335, |
|
"eval_samples_per_second": 1875.435, |
|
"eval_steps_per_second": 7.452, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.1961188316345215, |
|
"learning_rate": 4.60006518904824e-05, |
|
"loss": 0.6113, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7168619460010188, |
|
"eval_loss": 0.700081467628479, |
|
"eval_runtime": 5.2018, |
|
"eval_samples_per_second": 1886.83, |
|
"eval_steps_per_second": 7.497, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.4144446849823, |
|
"learning_rate": 4.50006518904824e-05, |
|
"loss": 0.5654, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7245033112582782, |
|
"eval_loss": 0.6811214685440063, |
|
"eval_runtime": 5.2441, |
|
"eval_samples_per_second": 1871.624, |
|
"eval_steps_per_second": 7.437, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.4327688217163086, |
|
"learning_rate": 4.40006518904824e-05, |
|
"loss": 0.5207, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7257259296994396, |
|
"eval_loss": 0.7057485580444336, |
|
"eval_runtime": 5.236, |
|
"eval_samples_per_second": 1874.515, |
|
"eval_steps_per_second": 7.448, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.349059581756592, |
|
"learning_rate": 4.30006518904824e-05, |
|
"loss": 0.4798, |
|
"step": 10738 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7290881304126338, |
|
"eval_loss": 0.7188459038734436, |
|
"eval_runtime": 5.2738, |
|
"eval_samples_per_second": 1861.082, |
|
"eval_steps_per_second": 7.395, |
|
"step": 10738 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.1730854511260986, |
|
"learning_rate": 4.20006518904824e-05, |
|
"loss": 0.4403, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7230769230769231, |
|
"eval_loss": 0.7684288024902344, |
|
"eval_runtime": 5.2321, |
|
"eval_samples_per_second": 1875.918, |
|
"eval_steps_per_second": 7.454, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.504354953765869, |
|
"learning_rate": 4.10006518904824e-05, |
|
"loss": 0.4036, |
|
"step": 13806 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7164544065206316, |
|
"eval_loss": 0.8033895492553711, |
|
"eval_runtime": 5.2638, |
|
"eval_samples_per_second": 1864.638, |
|
"eval_steps_per_second": 7.409, |
|
"step": 13806 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.7538444995880127, |
|
"learning_rate": 4.00006518904824e-05, |
|
"loss": 0.3685, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7219561895058584, |
|
"eval_loss": 0.837572455406189, |
|
"eval_runtime": 5.2382, |
|
"eval_samples_per_second": 1873.732, |
|
"eval_steps_per_second": 7.445, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 15340, |
|
"total_flos": 7.76592697038336e+16, |
|
"train_loss": 0.5669185474301255, |
|
"train_runtime": 2952.0721, |
|
"train_samples_per_second": 6651.294, |
|
"train_steps_per_second": 25.982 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 76700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.76592697038336e+16, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|