|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 68, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14705882352941177, |
|
"grad_norm": 0.9781005831627027, |
|
"learning_rate": 2.8571428571428574e-05, |
|
"loss": 1.1084, |
|
"mean_token_accuracy": 0.7578839182853698, |
|
"num_tokens": 640462.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.29411764705882354, |
|
"grad_norm": 0.6955683337172678, |
|
"learning_rate": 3.8032786885245905e-05, |
|
"loss": 0.9348, |
|
"mean_token_accuracy": 0.7874236106872559, |
|
"num_tokens": 1259362.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.4411764705882353, |
|
"grad_norm": 1.8154067226337278, |
|
"learning_rate": 3.475409836065574e-05, |
|
"loss": 0.9954, |
|
"mean_token_accuracy": 0.7773380041122436, |
|
"num_tokens": 1895339.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 0.6357695058074742, |
|
"learning_rate": 3.1475409836065576e-05, |
|
"loss": 0.8677, |
|
"mean_token_accuracy": 0.8019283294677735, |
|
"num_tokens": 2505918.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"grad_norm": 0.8133712484258966, |
|
"learning_rate": 2.8196721311475412e-05, |
|
"loss": 0.8142, |
|
"mean_token_accuracy": 0.8124492406845093, |
|
"num_tokens": 3153639.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 0.5210471105697568, |
|
"learning_rate": 2.4918032786885248e-05, |
|
"loss": 0.8708, |
|
"mean_token_accuracy": 0.797481119632721, |
|
"num_tokens": 3789164.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.0294117647058822, |
|
"grad_norm": 3.7021334939852113, |
|
"learning_rate": 2.1639344262295087e-05, |
|
"loss": 0.846, |
|
"mean_token_accuracy": 0.809739100933075, |
|
"num_tokens": 4419827.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"grad_norm": 0.6754015523647303, |
|
"learning_rate": 1.836065573770492e-05, |
|
"loss": 0.7306, |
|
"mean_token_accuracy": 0.8303763628005981, |
|
"num_tokens": 5044302.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.3235294117647058, |
|
"grad_norm": 1.2809188857705796, |
|
"learning_rate": 1.5081967213114754e-05, |
|
"loss": 0.6667, |
|
"mean_token_accuracy": 0.8389277696609497, |
|
"num_tokens": 5677897.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 0.5380085475385603, |
|
"learning_rate": 1.1803278688524591e-05, |
|
"loss": 0.6416, |
|
"mean_token_accuracy": 0.8458507895469666, |
|
"num_tokens": 6307507.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.6176470588235294, |
|
"grad_norm": 0.46342793767814744, |
|
"learning_rate": 8.524590163934427e-06, |
|
"loss": 0.6526, |
|
"mean_token_accuracy": 0.8422300696372986, |
|
"num_tokens": 6947098.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.7647058823529411, |
|
"grad_norm": 0.4489151107136719, |
|
"learning_rate": 5.245901639344263e-06, |
|
"loss": 0.5468, |
|
"mean_token_accuracy": 0.8634997725486755, |
|
"num_tokens": 7567565.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.9117647058823528, |
|
"grad_norm": 0.7612436131924979, |
|
"learning_rate": 1.9672131147540985e-06, |
|
"loss": 0.6499, |
|
"mean_token_accuracy": 0.8438345670700074, |
|
"num_tokens": 8199491.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"mean_token_accuracy": 0.8247674107551575, |
|
"num_tokens": 8590054.0, |
|
"step": 68, |
|
"total_flos": 12887022829568.0, |
|
"train_loss": 0.7892868659075569, |
|
"train_runtime": 182.7339, |
|
"train_samples_per_second": 5.899, |
|
"train_steps_per_second": 0.372 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 68, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 12887022829568.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|