{ "best_metric": 0.25741004943847656, "best_model_checkpoint": "bert_uncased_L-4_H-512_A-8_qqp/checkpoint-4266", "epoch": 8.0, "eval_steps": 500, "global_step": 11376, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.9684581756591797, "learning_rate": 4.9e-05, "loss": 0.3551, "step": 1422 }, { "epoch": 1.0, "eval_accuracy": 0.86928023744744, "eval_combined_score": 0.8435916399310384, "eval_f1": 0.8179030424146366, "eval_loss": 0.2993009090423584, "eval_runtime": 16.3511, "eval_samples_per_second": 2472.617, "eval_steps_per_second": 9.663, "step": 1422 }, { "epoch": 2.0, "grad_norm": 4.823949813842773, "learning_rate": 4.8e-05, "loss": 0.2662, "step": 2844 }, { "epoch": 2.0, "eval_accuracy": 0.8841701706653475, "eval_combined_score": 0.8667011633726596, "eval_f1": 0.8492321560799717, "eval_loss": 0.2662528157234192, "eval_runtime": 16.3577, "eval_samples_per_second": 2471.626, "eval_steps_per_second": 9.659, "step": 2844 }, { "epoch": 3.0, "grad_norm": 3.9237613677978516, "learning_rate": 4.7e-05, "loss": 0.217, "step": 4266 }, { "epoch": 3.0, "eval_accuracy": 0.8914172644076181, "eval_combined_score": 0.8726734289030837, "eval_f1": 0.8539295933985492, "eval_loss": 0.25741004943847656, "eval_runtime": 15.9668, "eval_samples_per_second": 2532.133, "eval_steps_per_second": 9.896, "step": 4266 }, { "epoch": 4.0, "grad_norm": 4.0418925285339355, "learning_rate": 4.600000000000001e-05, "loss": 0.179, "step": 5688 }, { "epoch": 4.0, "eval_accuracy": 0.8936928023744743, "eval_combined_score": 0.8743674804452473, "eval_f1": 0.8550421585160203, "eval_loss": 0.26455405354499817, "eval_runtime": 16.0198, "eval_samples_per_second": 2523.748, "eval_steps_per_second": 9.863, "step": 5688 }, { "epoch": 5.0, "grad_norm": 3.7040185928344727, "learning_rate": 4.5e-05, "loss": 0.1487, "step": 7110 }, { "epoch": 5.0, "eval_accuracy": 0.8937917388078159, "eval_combined_score": 0.8760050287514614, "eval_f1": 0.8582183186951067, "eval_loss": 0.291958212852478, "eval_runtime": 16.0229, "eval_samples_per_second": 2523.268, "eval_steps_per_second": 9.861, "step": 7110 }, { "epoch": 6.0, "grad_norm": 4.520602226257324, "learning_rate": 4.4000000000000006e-05, "loss": 0.1228, "step": 8532 }, { "epoch": 6.0, "eval_accuracy": 0.8936433341578036, "eval_combined_score": 0.8761816355373063, "eval_f1": 0.858719936916809, "eval_loss": 0.29707542061805725, "eval_runtime": 16.029, "eval_samples_per_second": 2522.304, "eval_steps_per_second": 9.857, "step": 8532 }, { "epoch": 7.0, "grad_norm": 3.891303062438965, "learning_rate": 4.3e-05, "loss": 0.1042, "step": 9954 }, { "epoch": 7.0, "eval_accuracy": 0.8915904031659658, "eval_combined_score": 0.8746264294594484, "eval_f1": 0.8576624557529309, "eval_loss": 0.3390096127986908, "eval_runtime": 16.0668, "eval_samples_per_second": 2516.367, "eval_steps_per_second": 9.834, "step": 9954 }, { "epoch": 8.0, "grad_norm": 5.342499256134033, "learning_rate": 4.2e-05, "loss": 0.0882, "step": 11376 }, { "epoch": 8.0, "eval_accuracy": 0.8930744496660895, "eval_combined_score": 0.8767506365141864, "eval_f1": 0.8604268233622833, "eval_loss": 0.3567749857902527, "eval_runtime": 16.0666, "eval_samples_per_second": 2516.407, "eval_steps_per_second": 9.834, "step": 11376 }, { "epoch": 8.0, "step": 11376, "total_flos": 5.755995899574682e+16, "train_loss": 0.18514271359235762, "train_runtime": 1915.5632, "train_samples_per_second": 9497.103, "train_steps_per_second": 37.117 } ], "logging_steps": 1, "max_steps": 71100, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.755995899574682e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }