|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9577464788732395, |
|
"global_step": 420, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 3.769569158554077, |
|
"eval_runtime": 27.5829, |
|
"eval_samples_per_second": 80.34, |
|
"eval_steps_per_second": 1.269, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 3.7680795192718506, |
|
"eval_runtime": 27.3249, |
|
"eval_samples_per_second": 81.098, |
|
"eval_steps_per_second": 1.281, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 3.7672388553619385, |
|
"eval_runtime": 27.4916, |
|
"eval_samples_per_second": 80.606, |
|
"eval_steps_per_second": 1.273, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 3.765839099884033, |
|
"eval_runtime": 27.4005, |
|
"eval_samples_per_second": 80.874, |
|
"eval_steps_per_second": 1.277, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 3.763303518295288, |
|
"eval_runtime": 27.5296, |
|
"eval_samples_per_second": 80.495, |
|
"eval_steps_per_second": 1.271, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 3.7637836933135986, |
|
"eval_runtime": 27.328, |
|
"eval_samples_per_second": 81.089, |
|
"eval_steps_per_second": 1.281, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 3.760821580886841, |
|
"eval_runtime": 27.5647, |
|
"eval_samples_per_second": 80.393, |
|
"eval_steps_per_second": 1.27, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 3.7594783306121826, |
|
"eval_runtime": 27.3075, |
|
"eval_samples_per_second": 81.15, |
|
"eval_steps_per_second": 1.282, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 3.7594757080078125, |
|
"eval_runtime": 27.4379, |
|
"eval_samples_per_second": 80.764, |
|
"eval_steps_per_second": 1.276, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 3.7583823204040527, |
|
"eval_runtime": 27.5694, |
|
"eval_samples_per_second": 80.379, |
|
"eval_steps_per_second": 1.27, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 3.7562084197998047, |
|
"eval_runtime": 27.3431, |
|
"eval_samples_per_second": 81.044, |
|
"eval_steps_per_second": 1.28, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 3.755629301071167, |
|
"eval_runtime": 27.5374, |
|
"eval_samples_per_second": 80.472, |
|
"eval_steps_per_second": 1.271, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 3.7544658184051514, |
|
"eval_runtime": 27.3291, |
|
"eval_samples_per_second": 81.086, |
|
"eval_steps_per_second": 1.281, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 3.7536823749542236, |
|
"eval_runtime": 27.3625, |
|
"eval_samples_per_second": 80.987, |
|
"eval_steps_per_second": 1.279, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 3.7540736198425293, |
|
"eval_runtime": 27.6591, |
|
"eval_samples_per_second": 80.118, |
|
"eval_steps_per_second": 1.265, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 3.7542569637298584, |
|
"eval_runtime": 27.4748, |
|
"eval_samples_per_second": 80.656, |
|
"eval_steps_per_second": 1.274, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 3.752318859100342, |
|
"eval_runtime": 27.6148, |
|
"eval_samples_per_second": 80.247, |
|
"eval_steps_per_second": 1.267, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 3.7517406940460205, |
|
"eval_runtime": 27.3496, |
|
"eval_samples_per_second": 81.025, |
|
"eval_steps_per_second": 1.28, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 3.751498222351074, |
|
"eval_runtime": 27.5367, |
|
"eval_samples_per_second": 80.474, |
|
"eval_steps_per_second": 1.271, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 3.7509350776672363, |
|
"eval_runtime": 27.354, |
|
"eval_samples_per_second": 81.012, |
|
"eval_steps_per_second": 1.28, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 3.750244140625, |
|
"eval_runtime": 27.3514, |
|
"eval_samples_per_second": 81.02, |
|
"eval_steps_per_second": 1.28, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 3.750495433807373, |
|
"eval_runtime": 27.5933, |
|
"eval_samples_per_second": 80.309, |
|
"eval_steps_per_second": 1.268, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 3.7491180896759033, |
|
"eval_runtime": 27.3222, |
|
"eval_samples_per_second": 81.106, |
|
"eval_steps_per_second": 1.281, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 3.747929096221924, |
|
"eval_runtime": 27.6237, |
|
"eval_samples_per_second": 80.221, |
|
"eval_steps_per_second": 1.267, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 3.749088764190674, |
|
"eval_runtime": 27.2827, |
|
"eval_samples_per_second": 81.224, |
|
"eval_steps_per_second": 1.283, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 3.748143434524536, |
|
"eval_runtime": 27.5685, |
|
"eval_samples_per_second": 80.381, |
|
"eval_steps_per_second": 1.27, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 3.747373580932617, |
|
"eval_runtime": 27.3404, |
|
"eval_samples_per_second": 81.052, |
|
"eval_steps_per_second": 1.28, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 3.7469279766082764, |
|
"eval_runtime": 27.5885, |
|
"eval_samples_per_second": 80.323, |
|
"eval_steps_per_second": 1.269, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 3.7472589015960693, |
|
"eval_runtime": 27.3166, |
|
"eval_samples_per_second": 81.123, |
|
"eval_steps_per_second": 1.281, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 3.7470862865448, |
|
"eval_runtime": 27.2994, |
|
"eval_samples_per_second": 81.174, |
|
"eval_steps_per_second": 1.282, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 3.746790885925293, |
|
"eval_runtime": 27.3969, |
|
"eval_samples_per_second": 80.885, |
|
"eval_steps_per_second": 1.278, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 3.747112989425659, |
|
"eval_runtime": 27.3496, |
|
"eval_samples_per_second": 81.025, |
|
"eval_steps_per_second": 1.28, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 3.747150421142578, |
|
"eval_runtime": 27.5712, |
|
"eval_samples_per_second": 80.374, |
|
"eval_steps_per_second": 1.269, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 3.7463109493255615, |
|
"eval_runtime": 27.3541, |
|
"eval_samples_per_second": 81.012, |
|
"eval_steps_per_second": 1.28, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 3.745861530303955, |
|
"eval_runtime": 27.5761, |
|
"eval_samples_per_second": 80.36, |
|
"eval_steps_per_second": 1.269, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 3.7457809448242188, |
|
"eval_runtime": 27.4138, |
|
"eval_samples_per_second": 80.835, |
|
"eval_steps_per_second": 1.277, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 3.745532751083374, |
|
"eval_runtime": 27.5336, |
|
"eval_samples_per_second": 80.483, |
|
"eval_steps_per_second": 1.271, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_loss": 3.7454142570495605, |
|
"eval_runtime": 27.3417, |
|
"eval_samples_per_second": 81.048, |
|
"eval_steps_per_second": 1.28, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 3.7453861236572266, |
|
"eval_runtime": 27.3314, |
|
"eval_samples_per_second": 81.079, |
|
"eval_steps_per_second": 1.281, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_loss": 3.7453415393829346, |
|
"eval_runtime": 27.5694, |
|
"eval_samples_per_second": 80.379, |
|
"eval_steps_per_second": 1.27, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 3.745169162750244, |
|
"eval_runtime": 27.3841, |
|
"eval_samples_per_second": 80.923, |
|
"eval_steps_per_second": 1.278, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 3.745060682296753, |
|
"eval_runtime": 27.5883, |
|
"eval_samples_per_second": 80.324, |
|
"eval_steps_per_second": 1.269, |
|
"step": 420 |
|
} |
|
], |
|
"max_steps": 426, |
|
"num_train_epochs": 3, |
|
"total_flos": 875997356359680.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|