|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 28.0, |
|
"global_step": 14252, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.9801178781925346e-05, |
|
"loss": 2.0175, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.9601178781925344e-05, |
|
"loss": 1.0916, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.9401571709233793e-05, |
|
"loss": 0.9, |
|
"step": 1527 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.920196463654224e-05, |
|
"loss": 0.7942, |
|
"step": 2036 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.900235756385069e-05, |
|
"loss": 0.7212, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.8803536345776033e-05, |
|
"loss": 0.6697, |
|
"step": 3054 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.860432220039293e-05, |
|
"loss": 0.6265, |
|
"step": 3563 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.8407072691552063e-05, |
|
"loss": 0.6136, |
|
"step": 4072 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.8207465618860512e-05, |
|
"loss": 0.8441, |
|
"step": 4581 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.800943025540275e-05, |
|
"loss": 1.0052, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 1.7810216110019647e-05, |
|
"loss": 0.9929, |
|
"step": 5599 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1.7610609037328093e-05, |
|
"loss": 0.983, |
|
"step": 6108 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.7412180746561888e-05, |
|
"loss": 0.9804, |
|
"step": 6617 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 1.7213359528487232e-05, |
|
"loss": 0.9818, |
|
"step": 7126 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.701375245579568e-05, |
|
"loss": 0.9822, |
|
"step": 7635 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 1.6814931237721025e-05, |
|
"loss": 0.9819, |
|
"step": 8144 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 1.6614931237721024e-05, |
|
"loss": 0.9826, |
|
"step": 8653 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 1.6416895874263262e-05, |
|
"loss": 0.9822, |
|
"step": 9162 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 1.6218074656188606e-05, |
|
"loss": 0.9827, |
|
"step": 9671 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1.6019646365422398e-05, |
|
"loss": 0.9843, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 1.5820039292730847e-05, |
|
"loss": 0.9808, |
|
"step": 10689 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 1.5620432220039293e-05, |
|
"loss": 0.9823, |
|
"step": 11198 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 1.5420825147347742e-05, |
|
"loss": 0.9801, |
|
"step": 11707 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 1.5222396856581534e-05, |
|
"loss": 0.9824, |
|
"step": 12216 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 1.5022789783889981e-05, |
|
"loss": 0.9813, |
|
"step": 12725 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 1.4823968565815325e-05, |
|
"loss": 0.9836, |
|
"step": 13234 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 1.4624361493123773e-05, |
|
"loss": 0.9838, |
|
"step": 13743 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 1.4425933202357564e-05, |
|
"loss": 0.9803, |
|
"step": 14252 |
|
} |
|
], |
|
"max_steps": 50900, |
|
"num_train_epochs": 100, |
|
"total_flos": 3675621063214080.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|