|
{ |
|
"best_metric": 3.5847387313842773, |
|
"best_model_checkpoint": "./models/22_12_13_luther_blocks_larger_fp16_20ep/checkpoint-400", |
|
"epoch": 19.984126984126984, |
|
"global_step": 620, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.21560707501060514, |
|
"eval_loss": 4.621785640716553, |
|
"eval_runtime": 1.1004, |
|
"eval_samples_per_second": 48.163, |
|
"eval_steps_per_second": 6.361, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 4.2338709677419356e-05, |
|
"loss": 8.1175, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_accuracy": 0.263284088603626, |
|
"eval_loss": 4.0403876304626465, |
|
"eval_runtime": 1.1008, |
|
"eval_samples_per_second": 48.145, |
|
"eval_steps_per_second": 6.359, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_accuracy": 0.28707648610265774, |
|
"eval_loss": 3.8119544982910156, |
|
"eval_runtime": 1.1015, |
|
"eval_samples_per_second": 48.117, |
|
"eval_steps_per_second": 6.355, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 3.427419354838709e-05, |
|
"loss": 3.734, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"eval_accuracy": 0.299655102454859, |
|
"eval_loss": 3.7062137126922607, |
|
"eval_runtime": 1.1015, |
|
"eval_samples_per_second": 48.116, |
|
"eval_steps_per_second": 6.355, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_accuracy": 0.3081576569099393, |
|
"eval_loss": 3.638169765472412, |
|
"eval_runtime": 1.1008, |
|
"eval_samples_per_second": 48.148, |
|
"eval_steps_per_second": 6.359, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 2.620967741935484e-05, |
|
"loss": 3.3639, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_accuracy": 0.3127685866578137, |
|
"eval_loss": 3.610761880874634, |
|
"eval_runtime": 1.1014, |
|
"eval_samples_per_second": 48.121, |
|
"eval_steps_per_second": 6.356, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"eval_accuracy": 0.31479739574687843, |
|
"eval_loss": 3.6012349128723145, |
|
"eval_runtime": 1.1022, |
|
"eval_samples_per_second": 48.084, |
|
"eval_steps_per_second": 6.351, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"learning_rate": 1.8145161290322583e-05, |
|
"loss": 3.1363, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"eval_accuracy": 0.31680776111695164, |
|
"eval_loss": 3.5847387313842773, |
|
"eval_runtime": 1.1008, |
|
"eval_samples_per_second": 48.147, |
|
"eval_steps_per_second": 6.359, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"eval_accuracy": 0.31802504657039044, |
|
"eval_loss": 3.5913662910461426, |
|
"eval_runtime": 1.0976, |
|
"eval_samples_per_second": 48.286, |
|
"eval_steps_per_second": 6.377, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 1.0080645161290323e-05, |
|
"loss": 2.9884, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"eval_accuracy": 0.317711503347535, |
|
"eval_loss": 3.5954136848449707, |
|
"eval_runtime": 1.0979, |
|
"eval_samples_per_second": 48.272, |
|
"eval_steps_per_second": 6.376, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 17.73, |
|
"eval_accuracy": 0.317563953595603, |
|
"eval_loss": 3.600076675415039, |
|
"eval_runtime": 1.1024, |
|
"eval_samples_per_second": 48.075, |
|
"eval_steps_per_second": 6.35, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 2.0161290322580646e-06, |
|
"loss": 2.8748, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"eval_accuracy": 0.31879968276803333, |
|
"eval_loss": 3.604796886444092, |
|
"eval_runtime": 1.1023, |
|
"eval_samples_per_second": 48.08, |
|
"eval_steps_per_second": 6.35, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 19.98, |
|
"step": 620, |
|
"total_flos": 5223750303744000.0, |
|
"train_loss": 3.996271416448778, |
|
"train_runtime": 495.8533, |
|
"train_samples_per_second": 20.167, |
|
"train_steps_per_second": 1.25 |
|
} |
|
], |
|
"max_steps": 620, |
|
"num_train_epochs": 20, |
|
"total_flos": 5223750303744000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|