{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7561820571018995, "global_step": 3165, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.978494623655914e-05, "loss": 1.0308, "step": 50 }, { "epoch": 0.02, "learning_rate": 1.954599761051374e-05, "loss": 0.7656, "step": 100 }, { "epoch": 0.04, "learning_rate": 1.930704898446834e-05, "loss": 0.8032, "step": 150 }, { "epoch": 0.05, "learning_rate": 1.906810035842294e-05, "loss": 0.7398, "step": 200 }, { "epoch": 0.06, "learning_rate": 1.882915173237754e-05, "loss": 0.7004, "step": 250 }, { "epoch": 0.06, "eval_accuracy": 0.6535433070866141, "eval_loss": 0.7174085378646851, "eval_runtime": 431.9798, "eval_samples_per_second": 2.058, "eval_steps_per_second": 2.058, "step": 250 }, { "epoch": 0.07, "learning_rate": 1.859020310633214e-05, "loss": 0.8009, "step": 300 }, { "epoch": 0.08, "learning_rate": 1.835125448028674e-05, "loss": 0.6551, "step": 350 }, { "epoch": 0.1, "learning_rate": 1.811230585424134e-05, "loss": 0.6348, "step": 400 }, { "epoch": 0.11, "learning_rate": 1.787335722819594e-05, "loss": 0.5236, "step": 450 }, { "epoch": 0.12, "learning_rate": 1.763440860215054e-05, "loss": 0.6735, "step": 500 }, { "epoch": 0.12, "eval_accuracy": 0.7007874015748031, "eval_loss": 0.6178467869758606, "eval_runtime": 430.5762, "eval_samples_per_second": 2.065, "eval_steps_per_second": 2.065, "step": 500 }, { "epoch": 0.13, "learning_rate": 1.7395459976105136e-05, "loss": 0.7371, "step": 550 }, { "epoch": 0.14, "learning_rate": 1.7156511350059738e-05, "loss": 0.6957, "step": 600 }, { "epoch": 0.16, "learning_rate": 1.691756272401434e-05, "loss": 0.592, "step": 650 }, { "epoch": 0.17, "learning_rate": 1.6678614097968937e-05, "loss": 0.5799, "step": 700 }, { "epoch": 0.18, "learning_rate": 1.6439665471923538e-05, "loss": 0.6165, "step": 750 }, { "epoch": 0.18, "eval_accuracy": 0.7424071991001124, "eval_loss": 0.5588846206665039, "eval_runtime": 431.475, "eval_samples_per_second": 2.06, "eval_steps_per_second": 2.06, "step": 750 }, { "epoch": 0.19, "learning_rate": 1.6200716845878136e-05, "loss": 0.5045, "step": 800 }, { "epoch": 0.2, "learning_rate": 1.5961768219832737e-05, "loss": 0.6213, "step": 850 }, { "epoch": 0.22, "learning_rate": 1.5722819593787338e-05, "loss": 0.6248, "step": 900 }, { "epoch": 0.23, "learning_rate": 1.5483870967741936e-05, "loss": 0.4998, "step": 950 }, { "epoch": 0.24, "learning_rate": 1.5244922341696537e-05, "loss": 0.6603, "step": 1000 }, { "epoch": 0.24, "eval_accuracy": 0.7559055118110236, "eval_loss": 0.5710476636886597, "eval_runtime": 432.0116, "eval_samples_per_second": 2.058, "eval_steps_per_second": 2.058, "step": 1000 }, { "epoch": 0.25, "learning_rate": 1.5005973715651137e-05, "loss": 0.5468, "step": 1050 }, { "epoch": 0.26, "learning_rate": 1.4767025089605736e-05, "loss": 0.511, "step": 1100 }, { "epoch": 0.27, "learning_rate": 1.4528076463560337e-05, "loss": 0.5575, "step": 1150 }, { "epoch": 0.29, "learning_rate": 1.4289127837514935e-05, "loss": 0.7658, "step": 1200 }, { "epoch": 0.3, "learning_rate": 1.4050179211469535e-05, "loss": 0.5575, "step": 1250 }, { "epoch": 0.3, "eval_accuracy": 0.7570303712035995, "eval_loss": 0.5421488881111145, "eval_runtime": 430.448, "eval_samples_per_second": 2.065, "eval_steps_per_second": 2.065, "step": 1250 }, { "epoch": 0.31, "learning_rate": 1.3811230585424136e-05, "loss": 0.6252, "step": 1300 }, { "epoch": 0.32, "learning_rate": 1.3572281959378735e-05, "loss": 0.4673, "step": 1350 }, { "epoch": 0.33, "learning_rate": 1.3333333333333333e-05, "loss": 0.5266, "step": 1400 }, { "epoch": 0.35, "learning_rate": 1.3094384707287935e-05, "loss": 0.6353, "step": 1450 }, { "epoch": 0.36, "learning_rate": 1.2855436081242534e-05, "loss": 0.589, "step": 1500 }, { "epoch": 0.36, "eval_accuracy": 0.750281214848144, "eval_loss": 0.5329739451408386, "eval_runtime": 429.8372, "eval_samples_per_second": 2.068, "eval_steps_per_second": 2.068, "step": 1500 }, { "epoch": 0.37, "learning_rate": 1.2616487455197134e-05, "loss": 0.6944, "step": 1550 }, { "epoch": 0.38, "learning_rate": 1.2377538829151735e-05, "loss": 0.4992, "step": 1600 }, { "epoch": 0.39, "learning_rate": 1.2138590203106333e-05, "loss": 0.6429, "step": 1650 }, { "epoch": 0.41, "learning_rate": 1.1899641577060932e-05, "loss": 0.5243, "step": 1700 }, { "epoch": 0.42, "learning_rate": 1.1660692951015533e-05, "loss": 0.5644, "step": 1750 }, { "epoch": 0.42, "eval_accuracy": 0.7559055118110236, "eval_loss": 0.5390347838401794, "eval_runtime": 429.8773, "eval_samples_per_second": 2.068, "eval_steps_per_second": 2.068, "step": 1750 }, { "epoch": 0.43, "learning_rate": 1.1421744324970133e-05, "loss": 0.6321, "step": 1800 }, { "epoch": 0.44, "learning_rate": 1.118279569892473e-05, "loss": 0.5939, "step": 1850 }, { "epoch": 0.45, "learning_rate": 1.0943847072879332e-05, "loss": 0.5477, "step": 1900 }, { "epoch": 0.47, "learning_rate": 1.0704898446833931e-05, "loss": 0.5171, "step": 1950 }, { "epoch": 0.48, "learning_rate": 1.0465949820788533e-05, "loss": 0.503, "step": 2000 }, { "epoch": 0.48, "eval_accuracy": 0.7592800899887514, "eval_loss": 0.5502843856811523, "eval_runtime": 429.7289, "eval_samples_per_second": 2.069, "eval_steps_per_second": 2.069, "step": 2000 }, { "epoch": 0.49, "learning_rate": 1.0227001194743132e-05, "loss": 0.6059, "step": 2050 }, { "epoch": 0.5, "learning_rate": 9.98805256869773e-06, "loss": 0.5716, "step": 2100 }, { "epoch": 0.51, "learning_rate": 9.749103942652331e-06, "loss": 0.5367, "step": 2150 }, { "epoch": 0.53, "learning_rate": 9.51015531660693e-06, "loss": 0.578, "step": 2200 }, { "epoch": 0.54, "learning_rate": 9.27120669056153e-06, "loss": 0.6361, "step": 2250 }, { "epoch": 0.54, "eval_accuracy": 0.7637795275590551, "eval_loss": 0.5347180366516113, "eval_runtime": 432.0598, "eval_samples_per_second": 2.058, "eval_steps_per_second": 2.058, "step": 2250 }, { "epoch": 0.55, "learning_rate": 9.03225806451613e-06, "loss": 0.5387, "step": 2300 }, { "epoch": 0.56, "learning_rate": 8.793309438470729e-06, "loss": 0.5409, "step": 2350 }, { "epoch": 0.57, "learning_rate": 8.55436081242533e-06, "loss": 0.4895, "step": 2400 }, { "epoch": 0.59, "learning_rate": 8.315412186379928e-06, "loss": 0.5966, "step": 2450 }, { "epoch": 0.6, "learning_rate": 8.07646356033453e-06, "loss": 0.517, "step": 2500 }, { "epoch": 0.6, "eval_accuracy": 0.7649043869516311, "eval_loss": 0.5409161448478699, "eval_runtime": 432.0753, "eval_samples_per_second": 2.058, "eval_steps_per_second": 2.058, "step": 2500 }, { "epoch": 0.61, "learning_rate": 7.837514934289129e-06, "loss": 0.5852, "step": 2550 }, { "epoch": 0.62, "learning_rate": 7.5985663082437275e-06, "loss": 0.5853, "step": 2600 }, { "epoch": 0.63, "learning_rate": 7.359617682198328e-06, "loss": 0.6096, "step": 2650 }, { "epoch": 0.65, "learning_rate": 7.120669056152928e-06, "loss": 0.6285, "step": 2700 }, { "epoch": 0.66, "learning_rate": 6.881720430107528e-06, "loss": 0.5481, "step": 2750 }, { "epoch": 0.66, "eval_accuracy": 0.7525309336332958, "eval_loss": 0.5184136033058167, "eval_runtime": 431.9074, "eval_samples_per_second": 2.058, "eval_steps_per_second": 2.058, "step": 2750 }, { "epoch": 0.67, "learning_rate": 6.642771804062127e-06, "loss": 0.6294, "step": 2800 }, { "epoch": 0.68, "learning_rate": 6.403823178016727e-06, "loss": 0.5585, "step": 2850 }, { "epoch": 0.69, "learning_rate": 6.164874551971327e-06, "loss": 0.5914, "step": 2900 }, { "epoch": 0.7, "learning_rate": 5.925925925925926e-06, "loss": 0.5831, "step": 2950 }, { "epoch": 0.72, "learning_rate": 5.686977299880526e-06, "loss": 0.6036, "step": 3000 }, { "epoch": 0.72, "eval_accuracy": 0.7592800899887514, "eval_loss": 0.5061925649642944, "eval_runtime": 431.9996, "eval_samples_per_second": 2.058, "eval_steps_per_second": 2.058, "step": 3000 }, { "epoch": 0.73, "learning_rate": 5.4480286738351265e-06, "loss": 0.4684, "step": 3050 }, { "epoch": 0.74, "learning_rate": 5.209080047789725e-06, "loss": 0.5459, "step": 3100 }, { "epoch": 0.75, "learning_rate": 4.9701314217443256e-06, "loss": 0.5358, "step": 3150 } ], "max_steps": 4185, "num_train_epochs": 1, "total_flos": 0.0, "trial_name": null, "trial_params": null }