|
{ |
|
"best_global_step": 170, |
|
"best_metric": 0.6184515357017517, |
|
"best_model_checkpoint": "bert_base_train_book_ent_15p_s_init_cola/checkpoint-170", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 340, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.1766574382781982, |
|
"learning_rate": 4.9029411764705883e-05, |
|
"loss": 0.6207, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6912751793861389, |
|
"eval_loss": 0.6185039281845093, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 0.8903, |
|
"eval_samples_per_second": 1171.53, |
|
"eval_steps_per_second": 5.616, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.5290485620498657, |
|
"learning_rate": 4.8029411764705886e-05, |
|
"loss": 0.6093, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6912751793861389, |
|
"eval_loss": 0.6191949248313904, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 0.8909, |
|
"eval_samples_per_second": 1170.701, |
|
"eval_steps_per_second": 5.612, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.8824849128723145, |
|
"learning_rate": 4.702941176470588e-05, |
|
"loss": 0.61, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6912751793861389, |
|
"eval_loss": 0.6199177503585815, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 0.9094, |
|
"eval_samples_per_second": 1146.898, |
|
"eval_steps_per_second": 5.498, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.128862738609314, |
|
"learning_rate": 4.6029411764705885e-05, |
|
"loss": 0.6103, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6912751793861389, |
|
"eval_loss": 0.618511438369751, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 0.904, |
|
"eval_samples_per_second": 1153.724, |
|
"eval_steps_per_second": 5.531, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.0845144987106323, |
|
"learning_rate": 4.502941176470589e-05, |
|
"loss": 0.6113, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6912751793861389, |
|
"eval_loss": 0.6184515357017517, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 0.8926, |
|
"eval_samples_per_second": 1168.527, |
|
"eval_steps_per_second": 5.602, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.6451035737991333, |
|
"learning_rate": 4.4029411764705884e-05, |
|
"loss": 0.6094, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6912751793861389, |
|
"eval_loss": 0.6199439764022827, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 0.8889, |
|
"eval_samples_per_second": 1173.306, |
|
"eval_steps_per_second": 5.625, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.6885813474655151, |
|
"learning_rate": 4.302941176470589e-05, |
|
"loss": 0.6098, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6912751793861389, |
|
"eval_loss": 0.6215131878852844, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 0.889, |
|
"eval_samples_per_second": 1173.173, |
|
"eval_steps_per_second": 5.624, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.6558329463005066, |
|
"learning_rate": 4.202941176470588e-05, |
|
"loss": 0.6113, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6912751793861389, |
|
"eval_loss": 0.6247884035110474, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 0.8876, |
|
"eval_samples_per_second": 1175.115, |
|
"eval_steps_per_second": 5.633, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.49616092443466187, |
|
"learning_rate": 4.1029411764705886e-05, |
|
"loss": 0.6098, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6912751793861389, |
|
"eval_loss": 0.6285766959190369, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 0.9282, |
|
"eval_samples_per_second": 1123.701, |
|
"eval_steps_per_second": 5.387, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.553864061832428, |
|
"learning_rate": 4.002941176470588e-05, |
|
"loss": 0.6106, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6912751793861389, |
|
"eval_loss": 0.6254100799560547, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 0.8992, |
|
"eval_samples_per_second": 1159.91, |
|
"eval_steps_per_second": 5.56, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 340, |
|
"total_flos": 1.12493131719168e+16, |
|
"train_loss": 0.6112596624037798, |
|
"train_runtime": 186.9033, |
|
"train_samples_per_second": 2287.547, |
|
"train_steps_per_second": 9.096 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.12493131719168e+16, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|