|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 10863, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7698609960416095e-05, |
|
"loss": 0.4597, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.539721992083218e-05, |
|
"loss": 0.3231, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.309582988124828e-05, |
|
"loss": 0.3073, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.079443984166437e-05, |
|
"loss": 0.2928, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.849304980208046e-05, |
|
"loss": 0.2808, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.619165976249655e-05, |
|
"loss": 0.275, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.389026972291264e-05, |
|
"loss": 0.2675, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.1588879683328734e-05, |
|
"loss": 0.2614, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9287489643744827e-05, |
|
"loss": 0.2584, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6986099604160913e-05, |
|
"loss": 0.2555, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4684709564577006e-05, |
|
"loss": 0.2532, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2383319524993096e-05, |
|
"loss": 0.2496, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0081929485409186e-05, |
|
"loss": 0.2483, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.778053944582528e-05, |
|
"loss": 0.2457, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.547914940624137e-05, |
|
"loss": 0.2454, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.317775936665746e-05, |
|
"loss": 0.2441, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.0876369327073553e-05, |
|
"loss": 0.2426, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.574979287489644e-06, |
|
"loss": 0.2427, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.273589247905736e-06, |
|
"loss": 0.2416, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.972199208321826e-06, |
|
"loss": 0.2398, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6708091687379177e-06, |
|
"loss": 0.2409, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 10863, |
|
"total_flos": 8305395513163776.0, |
|
"train_runtime": 2923.7518, |
|
"train_samples_per_second": 3.715 |
|
} |
|
], |
|
"max_steps": 10863, |
|
"num_train_epochs": 1, |
|
"total_flos": 8305395513163776.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|