|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 550, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.1882656350741457, |
|
"eval_loss": 5.243511199951172, |
|
"eval_runtime": 4.8686, |
|
"eval_samples_per_second": 48.268, |
|
"eval_steps_per_second": 6.162, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.2259354006780225, |
|
"eval_loss": 4.6670308113098145, |
|
"eval_runtime": 4.8738, |
|
"eval_samples_per_second": 48.217, |
|
"eval_steps_per_second": 6.155, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.24813543811484787, |
|
"eval_loss": 4.436723709106445, |
|
"eval_runtime": 4.8864, |
|
"eval_samples_per_second": 48.093, |
|
"eval_steps_per_second": 6.14, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_accuracy": 0.25969925750296374, |
|
"eval_loss": 4.3258056640625, |
|
"eval_runtime": 4.8712, |
|
"eval_samples_per_second": 48.243, |
|
"eval_steps_per_second": 6.159, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.772727272727273e-05, |
|
"loss": 6.175, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_accuracy": 0.26481562363511574, |
|
"eval_loss": 4.270518779754639, |
|
"eval_runtime": 4.8757, |
|
"eval_samples_per_second": 48.198, |
|
"eval_steps_per_second": 6.153, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_accuracy": 0.26965745304798155, |
|
"eval_loss": 4.2317938804626465, |
|
"eval_runtime": 4.8861, |
|
"eval_samples_per_second": 48.096, |
|
"eval_steps_per_second": 6.14, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_accuracy": 0.2718995029221522, |
|
"eval_loss": 4.208972454071045, |
|
"eval_runtime": 4.8935, |
|
"eval_samples_per_second": 48.023, |
|
"eval_steps_per_second": 6.131, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_accuracy": 0.27399596514215596, |
|
"eval_loss": 4.191540718078613, |
|
"eval_runtime": 4.8923, |
|
"eval_samples_per_second": 48.035, |
|
"eval_steps_per_second": 6.132, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"eval_accuracy": 0.2745658368170379, |
|
"eval_loss": 4.18229866027832, |
|
"eval_runtime": 4.8912, |
|
"eval_samples_per_second": 48.045, |
|
"eval_steps_per_second": 6.133, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 5e-06, |
|
"loss": 3.9191, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_accuracy": 0.2756639836941827, |
|
"eval_loss": 4.17605447769165, |
|
"eval_runtime": 4.8985, |
|
"eval_samples_per_second": 47.974, |
|
"eval_steps_per_second": 6.124, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.27604251159501675, |
|
"eval_loss": 4.173768997192383, |
|
"eval_runtime": 4.8913, |
|
"eval_samples_per_second": 48.044, |
|
"eval_steps_per_second": 6.133, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 550, |
|
"total_flos": 4585675161600000.0, |
|
"train_loss": 4.937346829501065, |
|
"train_runtime": 481.6631, |
|
"train_samples_per_second": 18.218, |
|
"train_steps_per_second": 1.142 |
|
} |
|
], |
|
"max_steps": 550, |
|
"num_train_epochs": 5, |
|
"total_flos": 4585675161600000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|