{ "best_metric": 0.3330647945404053, "best_model_checkpoint": "bert_uncased_L-4_H-512_A-8_sst2/checkpoint-792", "epoch": 8.0, "eval_steps": 500, "global_step": 2112, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 8.263431549072266, "learning_rate": 4.9e-05, "loss": 0.3288, "step": 264 }, { "epoch": 1.0, "eval_accuracy": 0.8600917431192661, "eval_loss": 0.3597773015499115, "eval_runtime": 0.3533, "eval_samples_per_second": 2468.422, "eval_steps_per_second": 11.323, "step": 264 }, { "epoch": 2.0, "grad_norm": 12.187209129333496, "learning_rate": 4.8e-05, "loss": 0.1917, "step": 528 }, { "epoch": 2.0, "eval_accuracy": 0.8830275229357798, "eval_loss": 0.3433310091495514, "eval_runtime": 0.3464, "eval_samples_per_second": 2517.025, "eval_steps_per_second": 11.546, "step": 528 }, { "epoch": 3.0, "grad_norm": 9.220148086547852, "learning_rate": 4.7e-05, "loss": 0.1386, "step": 792 }, { "epoch": 3.0, "eval_accuracy": 0.8899082568807339, "eval_loss": 0.3330647945404053, "eval_runtime": 0.352, "eval_samples_per_second": 2476.954, "eval_steps_per_second": 11.362, "step": 792 }, { "epoch": 4.0, "grad_norm": 7.551930904388428, "learning_rate": 4.600000000000001e-05, "loss": 0.108, "step": 1056 }, { "epoch": 4.0, "eval_accuracy": 0.8761467889908257, "eval_loss": 0.403239369392395, "eval_runtime": 0.3511, "eval_samples_per_second": 2483.355, "eval_steps_per_second": 11.392, "step": 1056 }, { "epoch": 5.0, "grad_norm": 5.391944408416748, "learning_rate": 4.5e-05, "loss": 0.0871, "step": 1320 }, { "epoch": 5.0, "eval_accuracy": 0.8669724770642202, "eval_loss": 0.448205828666687, "eval_runtime": 0.3509, "eval_samples_per_second": 2484.86, "eval_steps_per_second": 11.398, "step": 1320 }, { "epoch": 6.0, "grad_norm": 9.638006210327148, "learning_rate": 4.4000000000000006e-05, "loss": 0.0734, "step": 1584 }, { "epoch": 6.0, "eval_accuracy": 0.8795871559633027, "eval_loss": 0.3766608238220215, "eval_runtime": 0.3422, "eval_samples_per_second": 2548.356, "eval_steps_per_second": 11.69, "step": 1584 }, { "epoch": 7.0, "grad_norm": 3.6945831775665283, "learning_rate": 4.3e-05, "loss": 0.0592, "step": 1848 }, { "epoch": 7.0, "eval_accuracy": 0.8795871559633027, "eval_loss": 0.4376164376735687, "eval_runtime": 0.3423, "eval_samples_per_second": 2547.61, "eval_steps_per_second": 11.686, "step": 1848 }, { "epoch": 8.0, "grad_norm": 9.038667678833008, "learning_rate": 4.2e-05, "loss": 0.0509, "step": 2112 }, { "epoch": 8.0, "eval_accuracy": 0.8715596330275229, "eval_loss": 0.4948262870311737, "eval_runtime": 0.3501, "eval_samples_per_second": 2490.756, "eval_steps_per_second": 11.425, "step": 2112 }, { "epoch": 8.0, "step": 2112, "total_flos": 1.0654523282939904e+16, "train_loss": 0.12969460812481967, "train_runtime": 335.7916, "train_samples_per_second": 10028.393, "train_steps_per_second": 39.31 } ], "logging_steps": 1, "max_steps": 13200, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0654523282939904e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }