{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9577464788732395, "global_step": 420, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "eval_loss": 3.769569158554077, "eval_runtime": 27.5829, "eval_samples_per_second": 80.34, "eval_steps_per_second": 1.269, "step": 10 }, { "epoch": 0.14, "eval_loss": 3.7680795192718506, "eval_runtime": 27.3249, "eval_samples_per_second": 81.098, "eval_steps_per_second": 1.281, "step": 20 }, { "epoch": 0.21, "eval_loss": 3.7672388553619385, "eval_runtime": 27.4916, "eval_samples_per_second": 80.606, "eval_steps_per_second": 1.273, "step": 30 }, { "epoch": 0.28, "eval_loss": 3.765839099884033, "eval_runtime": 27.4005, "eval_samples_per_second": 80.874, "eval_steps_per_second": 1.277, "step": 40 }, { "epoch": 0.35, "eval_loss": 3.763303518295288, "eval_runtime": 27.5296, "eval_samples_per_second": 80.495, "eval_steps_per_second": 1.271, "step": 50 }, { "epoch": 0.42, "eval_loss": 3.7637836933135986, "eval_runtime": 27.328, "eval_samples_per_second": 81.089, "eval_steps_per_second": 1.281, "step": 60 }, { "epoch": 0.49, "eval_loss": 3.760821580886841, "eval_runtime": 27.5647, "eval_samples_per_second": 80.393, "eval_steps_per_second": 1.27, "step": 70 }, { "epoch": 0.56, "eval_loss": 3.7594783306121826, "eval_runtime": 27.3075, "eval_samples_per_second": 81.15, "eval_steps_per_second": 1.282, "step": 80 }, { "epoch": 0.63, "eval_loss": 3.7594757080078125, "eval_runtime": 27.4379, "eval_samples_per_second": 80.764, "eval_steps_per_second": 1.276, "step": 90 }, { "epoch": 0.7, "eval_loss": 3.7583823204040527, "eval_runtime": 27.5694, "eval_samples_per_second": 80.379, "eval_steps_per_second": 1.27, "step": 100 }, { "epoch": 0.77, "eval_loss": 3.7562084197998047, "eval_runtime": 27.3431, "eval_samples_per_second": 81.044, "eval_steps_per_second": 1.28, "step": 110 }, { "epoch": 0.85, "eval_loss": 3.755629301071167, "eval_runtime": 27.5374, "eval_samples_per_second": 80.472, "eval_steps_per_second": 1.271, "step": 120 }, { "epoch": 0.92, "eval_loss": 3.7544658184051514, "eval_runtime": 27.3291, "eval_samples_per_second": 81.086, "eval_steps_per_second": 1.281, "step": 130 }, { "epoch": 0.99, "eval_loss": 3.7536823749542236, "eval_runtime": 27.3625, "eval_samples_per_second": 80.987, "eval_steps_per_second": 1.279, "step": 140 }, { "epoch": 1.06, "eval_loss": 3.7540736198425293, "eval_runtime": 27.6591, "eval_samples_per_second": 80.118, "eval_steps_per_second": 1.265, "step": 150 }, { "epoch": 1.13, "eval_loss": 3.7542569637298584, "eval_runtime": 27.4748, "eval_samples_per_second": 80.656, "eval_steps_per_second": 1.274, "step": 160 }, { "epoch": 1.2, "eval_loss": 3.752318859100342, "eval_runtime": 27.6148, "eval_samples_per_second": 80.247, "eval_steps_per_second": 1.267, "step": 170 }, { "epoch": 1.27, "eval_loss": 3.7517406940460205, "eval_runtime": 27.3496, "eval_samples_per_second": 81.025, "eval_steps_per_second": 1.28, "step": 180 }, { "epoch": 1.34, "eval_loss": 3.751498222351074, "eval_runtime": 27.5367, "eval_samples_per_second": 80.474, "eval_steps_per_second": 1.271, "step": 190 }, { "epoch": 1.41, "eval_loss": 3.7509350776672363, "eval_runtime": 27.354, "eval_samples_per_second": 81.012, "eval_steps_per_second": 1.28, "step": 200 }, { "epoch": 1.48, "eval_loss": 3.750244140625, "eval_runtime": 27.3514, "eval_samples_per_second": 81.02, "eval_steps_per_second": 1.28, "step": 210 }, { "epoch": 1.55, "eval_loss": 3.750495433807373, "eval_runtime": 27.5933, "eval_samples_per_second": 80.309, "eval_steps_per_second": 1.268, "step": 220 }, { "epoch": 1.62, "eval_loss": 3.7491180896759033, "eval_runtime": 27.3222, "eval_samples_per_second": 81.106, "eval_steps_per_second": 1.281, "step": 230 }, { "epoch": 1.69, "eval_loss": 3.747929096221924, "eval_runtime": 27.6237, "eval_samples_per_second": 80.221, "eval_steps_per_second": 1.267, "step": 240 }, { "epoch": 1.76, "eval_loss": 3.749088764190674, "eval_runtime": 27.2827, "eval_samples_per_second": 81.224, "eval_steps_per_second": 1.283, "step": 250 }, { "epoch": 1.83, "eval_loss": 3.748143434524536, "eval_runtime": 27.5685, "eval_samples_per_second": 80.381, "eval_steps_per_second": 1.27, "step": 260 }, { "epoch": 1.9, "eval_loss": 3.747373580932617, "eval_runtime": 27.3404, "eval_samples_per_second": 81.052, "eval_steps_per_second": 1.28, "step": 270 }, { "epoch": 1.97, "eval_loss": 3.7469279766082764, "eval_runtime": 27.5885, "eval_samples_per_second": 80.323, "eval_steps_per_second": 1.269, "step": 280 }, { "epoch": 2.04, "eval_loss": 3.7472589015960693, "eval_runtime": 27.3166, "eval_samples_per_second": 81.123, "eval_steps_per_second": 1.281, "step": 290 }, { "epoch": 2.11, "eval_loss": 3.7470862865448, "eval_runtime": 27.2994, "eval_samples_per_second": 81.174, "eval_steps_per_second": 1.282, "step": 300 }, { "epoch": 2.18, "eval_loss": 3.746790885925293, "eval_runtime": 27.3969, "eval_samples_per_second": 80.885, "eval_steps_per_second": 1.278, "step": 310 }, { "epoch": 2.25, "eval_loss": 3.747112989425659, "eval_runtime": 27.3496, "eval_samples_per_second": 81.025, "eval_steps_per_second": 1.28, "step": 320 }, { "epoch": 2.32, "eval_loss": 3.747150421142578, "eval_runtime": 27.5712, "eval_samples_per_second": 80.374, "eval_steps_per_second": 1.269, "step": 330 }, { "epoch": 2.39, "eval_loss": 3.7463109493255615, "eval_runtime": 27.3541, "eval_samples_per_second": 81.012, "eval_steps_per_second": 1.28, "step": 340 }, { "epoch": 2.46, "eval_loss": 3.745861530303955, "eval_runtime": 27.5761, "eval_samples_per_second": 80.36, "eval_steps_per_second": 1.269, "step": 350 }, { "epoch": 2.54, "eval_loss": 3.7457809448242188, "eval_runtime": 27.4138, "eval_samples_per_second": 80.835, "eval_steps_per_second": 1.277, "step": 360 }, { "epoch": 2.61, "eval_loss": 3.745532751083374, "eval_runtime": 27.5336, "eval_samples_per_second": 80.483, "eval_steps_per_second": 1.271, "step": 370 }, { "epoch": 2.68, "eval_loss": 3.7454142570495605, "eval_runtime": 27.3417, "eval_samples_per_second": 81.048, "eval_steps_per_second": 1.28, "step": 380 }, { "epoch": 2.75, "eval_loss": 3.7453861236572266, "eval_runtime": 27.3314, "eval_samples_per_second": 81.079, "eval_steps_per_second": 1.281, "step": 390 }, { "epoch": 2.82, "eval_loss": 3.7453415393829346, "eval_runtime": 27.5694, "eval_samples_per_second": 80.379, "eval_steps_per_second": 1.27, "step": 400 }, { "epoch": 2.89, "eval_loss": 3.745169162750244, "eval_runtime": 27.3841, "eval_samples_per_second": 80.923, "eval_steps_per_second": 1.278, "step": 410 }, { "epoch": 2.96, "eval_loss": 3.745060682296753, "eval_runtime": 27.5883, "eval_samples_per_second": 80.324, "eval_steps_per_second": 1.269, "step": 420 } ], "max_steps": 426, "num_train_epochs": 3, "total_flos": 875997356359680.0, "trial_name": null, "trial_params": null }