{ "best_metric": 3.5847387313842773, "best_model_checkpoint": "./models/22_12_13_luther_blocks_larger_fp16_20ep/checkpoint-400", "epoch": 19.984126984126984, "global_step": 620, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.6, "eval_accuracy": 0.21560707501060514, "eval_loss": 4.621785640716553, "eval_runtime": 1.1004, "eval_samples_per_second": 48.163, "eval_steps_per_second": 6.361, "step": 50 }, { "epoch": 3.22, "learning_rate": 4.2338709677419356e-05, "loss": 8.1175, "step": 100 }, { "epoch": 3.22, "eval_accuracy": 0.263284088603626, "eval_loss": 4.0403876304626465, "eval_runtime": 1.1008, "eval_samples_per_second": 48.145, "eval_steps_per_second": 6.359, "step": 100 }, { "epoch": 4.83, "eval_accuracy": 0.28707648610265774, "eval_loss": 3.8119544982910156, "eval_runtime": 1.1015, "eval_samples_per_second": 48.117, "eval_steps_per_second": 6.355, "step": 150 }, { "epoch": 6.44, "learning_rate": 3.427419354838709e-05, "loss": 3.734, "step": 200 }, { "epoch": 6.44, "eval_accuracy": 0.299655102454859, "eval_loss": 3.7062137126922607, "eval_runtime": 1.1015, "eval_samples_per_second": 48.116, "eval_steps_per_second": 6.355, "step": 200 }, { "epoch": 8.06, "eval_accuracy": 0.3081576569099393, "eval_loss": 3.638169765472412, "eval_runtime": 1.1008, "eval_samples_per_second": 48.148, "eval_steps_per_second": 6.359, "step": 250 }, { "epoch": 9.67, "learning_rate": 2.620967741935484e-05, "loss": 3.3639, "step": 300 }, { "epoch": 9.67, "eval_accuracy": 0.3127685866578137, "eval_loss": 3.610761880874634, "eval_runtime": 1.1014, "eval_samples_per_second": 48.121, "eval_steps_per_second": 6.356, "step": 300 }, { "epoch": 11.29, "eval_accuracy": 0.31479739574687843, "eval_loss": 3.6012349128723145, "eval_runtime": 1.1022, "eval_samples_per_second": 48.084, "eval_steps_per_second": 6.351, "step": 350 }, { "epoch": 12.89, "learning_rate": 1.8145161290322583e-05, "loss": 3.1363, "step": 400 }, { "epoch": 12.89, "eval_accuracy": 0.31680776111695164, "eval_loss": 3.5847387313842773, "eval_runtime": 1.1008, "eval_samples_per_second": 48.147, "eval_steps_per_second": 6.359, "step": 400 }, { "epoch": 14.51, "eval_accuracy": 0.31802504657039044, "eval_loss": 3.5913662910461426, "eval_runtime": 1.0976, "eval_samples_per_second": 48.286, "eval_steps_per_second": 6.377, "step": 450 }, { "epoch": 16.13, "learning_rate": 1.0080645161290323e-05, "loss": 2.9884, "step": 500 }, { "epoch": 16.13, "eval_accuracy": 0.317711503347535, "eval_loss": 3.5954136848449707, "eval_runtime": 1.0979, "eval_samples_per_second": 48.272, "eval_steps_per_second": 6.376, "step": 500 }, { "epoch": 17.73, "eval_accuracy": 0.317563953595603, "eval_loss": 3.600076675415039, "eval_runtime": 1.1024, "eval_samples_per_second": 48.075, "eval_steps_per_second": 6.35, "step": 550 }, { "epoch": 19.35, "learning_rate": 2.0161290322580646e-06, "loss": 2.8748, "step": 600 }, { "epoch": 19.35, "eval_accuracy": 0.31879968276803333, "eval_loss": 3.604796886444092, "eval_runtime": 1.1023, "eval_samples_per_second": 48.08, "eval_steps_per_second": 6.35, "step": 600 }, { "epoch": 19.98, "step": 620, "total_flos": 5223750303744000.0, "train_loss": 3.996271416448778, "train_runtime": 495.8533, "train_samples_per_second": 20.167, "train_steps_per_second": 1.25 } ], "max_steps": 620, "num_train_epochs": 20, "total_flos": 5223750303744000.0, "trial_name": null, "trial_params": null }