|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.888888888888889, |
|
"eval_steps": 500, |
|
"global_step": 28, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25396825396825395, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.642857142857143e-05, |
|
"loss": 72.1443, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.5079365079365079, |
|
"grad_norm": 19.469039916992188, |
|
"learning_rate": 8.92857142857143e-05, |
|
"loss": 72.0489, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.7619047619047619, |
|
"grad_norm": 20.524240493774414, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 69.7797, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 1.126984126984127, |
|
"grad_norm": 49.45151138305664, |
|
"learning_rate": 7.857142857142858e-05, |
|
"loss": 89.8185, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 1.380952380952381, |
|
"grad_norm": 32.59318161010742, |
|
"learning_rate": 7.142857142857143e-05, |
|
"loss": 62.2748, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.6349206349206349, |
|
"grad_norm": 32.76274871826172, |
|
"learning_rate": 6.428571428571429e-05, |
|
"loss": 57.2675, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.8888888888888888, |
|
"grad_norm": 36.317630767822266, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 56.7585, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 2.253968253968254, |
|
"grad_norm": 35.975807189941406, |
|
"learning_rate": 5e-05, |
|
"loss": 74.1119, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 2.507936507936508, |
|
"grad_norm": 20.22718620300293, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 49.704, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 2.761904761904762, |
|
"grad_norm": 18.7318172454834, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 49.5238, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 3.126984126984127, |
|
"grad_norm": 34.112125396728516, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 66.7037, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 3.380952380952381, |
|
"grad_norm": 19.139488220214844, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 46.9201, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 3.634920634920635, |
|
"grad_norm": 18.72429656982422, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 47.8016, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"grad_norm": 17.140914916992188, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 46.4582, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"step": 28, |
|
"total_flos": 445653973809192.0, |
|
"train_loss": 61.52253450666155, |
|
"train_runtime": 1057.0707, |
|
"train_samples_per_second": 0.477, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 28, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 445653973809192.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|