|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.0, |
|
"global_step": 1650, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.8713, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 74.73982970671712, |
|
"eval_f1": 83.85927956580818, |
|
"eval_runtime": 4.7071, |
|
"eval_samples_per_second": 2257.456, |
|
"eval_steps_per_second": 7.223, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0257, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 78.6092715231788, |
|
"eval_f1": 86.65693824950102, |
|
"eval_runtime": 3.83, |
|
"eval_samples_per_second": 2774.405, |
|
"eval_steps_per_second": 8.877, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8274, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 79.36613055818354, |
|
"eval_f1": 87.32332344288875, |
|
"eval_runtime": 3.8102, |
|
"eval_samples_per_second": 2788.796, |
|
"eval_steps_per_second": 8.923, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6861, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 79.39451277199622, |
|
"eval_f1": 87.21948865558768, |
|
"eval_runtime": 3.8507, |
|
"eval_samples_per_second": 2759.476, |
|
"eval_steps_per_second": 8.829, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5711, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 79.05392620624409, |
|
"eval_f1": 87.26064183346908, |
|
"eval_runtime": 3.8281, |
|
"eval_samples_per_second": 2775.791, |
|
"eval_steps_per_second": 8.882, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4746, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 79.16745506149479, |
|
"eval_f1": 87.46509960322021, |
|
"eval_runtime": 3.8602, |
|
"eval_samples_per_second": 2752.71, |
|
"eval_steps_per_second": 8.808, |
|
"step": 1650 |
|
} |
|
], |
|
"max_steps": 5500, |
|
"num_train_epochs": 20, |
|
"total_flos": 198561583071232.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|