{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0411962160512664, "global_step": 540, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "eval_loss": 3.030700922012329, "eval_runtime": 51.1742, "eval_samples_per_second": 6.116, "eval_steps_per_second": 0.391, "step": 20 }, { "epoch": 0.0, "eval_loss": 2.9279651641845703, "eval_runtime": 96.1374, "eval_samples_per_second": 3.256, "eval_steps_per_second": 0.416, "step": 40 }, { "epoch": 0.0, "eval_loss": 2.7418131828308105, "eval_runtime": 95.6416, "eval_samples_per_second": 3.273, "eval_steps_per_second": 0.418, "step": 60 }, { "epoch": 0.0, "eval_loss": 2.666383743286133, "eval_runtime": 95.265, "eval_samples_per_second": 3.286, "eval_steps_per_second": 0.42, "step": 80 }, { "epoch": 0.01, "eval_loss": 2.5859625339508057, "eval_runtime": 96.3364, "eval_samples_per_second": 3.249, "eval_steps_per_second": 0.415, "step": 100 }, { "epoch": 0.01, "eval_loss": 2.533696174621582, "eval_runtime": 94.072, "eval_samples_per_second": 3.327, "eval_steps_per_second": 0.425, "step": 120 }, { "epoch": 0.01, "eval_loss": 2.4880690574645996, "eval_runtime": 97.015, "eval_samples_per_second": 3.226, "eval_steps_per_second": 0.412, "step": 140 }, { "epoch": 0.01, "eval_loss": 2.484375, "eval_runtime": 94.9062, "eval_samples_per_second": 3.298, "eval_steps_per_second": 0.421, "step": 160 }, { "epoch": 0.01, "eval_loss": 2.444089412689209, "eval_runtime": 96.2311, "eval_samples_per_second": 3.253, "eval_steps_per_second": 0.416, "step": 180 }, { "epoch": 0.02, "eval_loss": 2.4108924865722656, "eval_runtime": 94.1107, "eval_samples_per_second": 3.326, "eval_steps_per_second": 0.425, "step": 200 }, { "epoch": 0.02, "eval_loss": 2.4031050205230713, "eval_runtime": 96.8698, "eval_samples_per_second": 3.231, "eval_steps_per_second": 0.413, "step": 220 }, { "epoch": 0.02, "eval_loss": 2.3866562843322754, "eval_runtime": 94.3449, "eval_samples_per_second": 3.318, "eval_steps_per_second": 0.424, "step": 240 }, { "epoch": 0.02, "eval_loss": 2.364941120147705, "eval_runtime": 94.3238, "eval_samples_per_second": 3.318, "eval_steps_per_second": 0.424, "step": 260 }, { "epoch": 0.02, "eval_loss": 2.3453474044799805, "eval_runtime": 95.7368, "eval_samples_per_second": 3.269, "eval_steps_per_second": 0.418, "step": 280 }, { "epoch": 0.02, "eval_loss": 2.3416035175323486, "eval_runtime": 94.4091, "eval_samples_per_second": 3.315, "eval_steps_per_second": 0.424, "step": 300 }, { "epoch": 0.02, "eval_loss": 2.328873872756958, "eval_runtime": 98.9931, "eval_samples_per_second": 3.162, "eval_steps_per_second": 0.404, "step": 320 }, { "epoch": 0.03, "eval_loss": 2.314546823501587, "eval_runtime": 98.9879, "eval_samples_per_second": 3.162, "eval_steps_per_second": 0.404, "step": 340 }, { "epoch": 0.03, "eval_loss": 2.3136730194091797, "eval_runtime": 97.3738, "eval_samples_per_second": 3.214, "eval_steps_per_second": 0.411, "step": 360 }, { "epoch": 0.03, "eval_loss": 2.3047373294830322, "eval_runtime": 95.8162, "eval_samples_per_second": 3.267, "eval_steps_per_second": 0.417, "step": 380 }, { "epoch": 0.03, "eval_loss": 2.2875399589538574, "eval_runtime": 96.4936, "eval_samples_per_second": 3.244, "eval_steps_per_second": 0.415, "step": 400 }, { "epoch": 0.03, "eval_loss": 2.28057599067688, "eval_runtime": 96.1017, "eval_samples_per_second": 3.257, "eval_steps_per_second": 0.416, "step": 420 }, { "epoch": 0.03, "eval_loss": 2.2859673500061035, "eval_runtime": 94.3087, "eval_samples_per_second": 3.319, "eval_steps_per_second": 0.424, "step": 440 }, { "epoch": 0.04, "eval_loss": 2.2626798152923584, "eval_runtime": 94.7992, "eval_samples_per_second": 3.302, "eval_steps_per_second": 0.422, "step": 460 }, { "epoch": 0.04, "eval_loss": 2.2764577865600586, "eval_runtime": 95.9001, "eval_samples_per_second": 3.264, "eval_steps_per_second": 0.417, "step": 480 }, { "epoch": 0.04, "learning_rate": 5e-05, "loss": 2.1752, "step": 500 }, { "epoch": 0.04, "eval_loss": 2.270517110824585, "eval_runtime": 95.5609, "eval_samples_per_second": 3.275, "eval_steps_per_second": 0.419, "step": 500 }, { "epoch": 0.04, "eval_loss": 2.2533695697784424, "eval_runtime": 95.989, "eval_samples_per_second": 3.261, "eval_steps_per_second": 0.417, "step": 520 }, { "epoch": 0.04, "eval_loss": 2.2560901641845703, "eval_runtime": 95.7778, "eval_samples_per_second": 3.268, "eval_steps_per_second": 0.418, "step": 540 } ], "max_steps": 13108, "num_train_epochs": 1, "total_flos": 7509129560064.0, "trial_name": null, "trial_params": null }