|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9948186528497409, |
|
"eval_steps": 100, |
|
"global_step": 168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.029607698001480384, |
|
"grad_norm": 0.29335439463795626, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 1.0993, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05921539600296077, |
|
"grad_norm": 0.24974276882783836, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 1.0434, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08882309400444116, |
|
"grad_norm": 0.13249460225804482, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 0.9668, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.11843079200592153, |
|
"grad_norm": 0.09803049524347336, |
|
"learning_rate": 1.9980527694749952e-05, |
|
"loss": 0.9077, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14803849000740193, |
|
"grad_norm": 0.07618540921622716, |
|
"learning_rate": 1.986180478852149e-05, |
|
"loss": 0.8708, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.17764618800888232, |
|
"grad_norm": 0.06017940731234282, |
|
"learning_rate": 1.963645895935632e-05, |
|
"loss": 0.8417, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20725388601036268, |
|
"grad_norm": 0.05030045847341557, |
|
"learning_rate": 1.930692657985482e-05, |
|
"loss": 0.8355, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.23686158401184307, |
|
"grad_norm": 0.04490192945997187, |
|
"learning_rate": 1.887677045685188e-05, |
|
"loss": 0.8342, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2664692820133235, |
|
"grad_norm": 0.04289421891660055, |
|
"learning_rate": 1.8350641311400813e-05, |
|
"loss": 0.8182, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.29607698001480387, |
|
"grad_norm": 0.042583173233877836, |
|
"learning_rate": 1.773422749654988e-05, |
|
"loss": 0.8081, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32568467801628426, |
|
"grad_norm": 0.042418491223949785, |
|
"learning_rate": 1.7034193496547903e-05, |
|
"loss": 0.7974, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.35529237601776464, |
|
"grad_norm": 0.04318659192859987, |
|
"learning_rate": 1.6258107872407376e-05, |
|
"loss": 0.8074, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38490007401924503, |
|
"grad_norm": 0.04186212190455867, |
|
"learning_rate": 1.5414361432856475e-05, |
|
"loss": 0.7811, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.41450777202072536, |
|
"grad_norm": 0.03891429842772997, |
|
"learning_rate": 1.4512076515391375e-05, |
|
"loss": 0.7829, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.44411547002220575, |
|
"grad_norm": 0.043245677806225846, |
|
"learning_rate": 1.356100835825547e-05, |
|
"loss": 0.7939, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.47372316802368614, |
|
"grad_norm": 0.0417510435796824, |
|
"learning_rate": 1.257143962968246e-05, |
|
"loss": 0.7752, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5033308660251665, |
|
"grad_norm": 0.04184839517393442, |
|
"learning_rate": 1.155406925472205e-05, |
|
"loss": 0.7791, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.532938564026647, |
|
"grad_norm": 0.04114645929545094, |
|
"learning_rate": 1.0519896741619803e-05, |
|
"loss": 0.7783, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5625462620281273, |
|
"grad_norm": 0.041770371505857336, |
|
"learning_rate": 9.480103258380198e-06, |
|
"loss": 0.7855, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5921539600296077, |
|
"grad_norm": 0.04114281633366516, |
|
"learning_rate": 8.445930745277953e-06, |
|
"loss": 0.7612, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5921539600296077, |
|
"eval_loss": 0.7964568138122559, |
|
"eval_runtime": 1.3496, |
|
"eval_samples_per_second": 94.841, |
|
"eval_steps_per_second": 2.964, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6217616580310881, |
|
"grad_norm": 0.040839116887339516, |
|
"learning_rate": 7.428560370317542e-06, |
|
"loss": 0.7651, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6513693560325685, |
|
"grad_norm": 0.0398495836348172, |
|
"learning_rate": 6.438991641744531e-06, |
|
"loss": 0.7763, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6809770540340488, |
|
"grad_norm": 0.03775265832702839, |
|
"learning_rate": 5.487923484608629e-06, |
|
"loss": 0.7708, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.7105847520355293, |
|
"grad_norm": 0.03691773787087903, |
|
"learning_rate": 4.5856385671435285e-06, |
|
"loss": 0.7598, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7401924500370096, |
|
"grad_norm": 0.0393036985818084, |
|
"learning_rate": 3.7418921275926245e-06, |
|
"loss": 0.7622, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7698001480384901, |
|
"grad_norm": 0.037994882364357475, |
|
"learning_rate": 2.965806503452098e-06, |
|
"loss": 0.7638, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7994078460399704, |
|
"grad_norm": 0.04067457252715543, |
|
"learning_rate": 2.265772503450122e-06, |
|
"loss": 0.7669, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.8290155440414507, |
|
"grad_norm": 0.03761393898606893, |
|
"learning_rate": 1.6493586885991908e-06, |
|
"loss": 0.7583, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8586232420429312, |
|
"grad_norm": 0.03804818743072382, |
|
"learning_rate": 1.1232295431481222e-06, |
|
"loss": 0.7651, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.8882309400444115, |
|
"grad_norm": 0.03722689725275752, |
|
"learning_rate": 6.930734201451817e-07, |
|
"loss": 0.7751, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9178386380458919, |
|
"grad_norm": 0.03700633814993851, |
|
"learning_rate": 3.635410406436857e-07, |
|
"loss": 0.7699, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.9474463360473723, |
|
"grad_norm": 0.035679947322838766, |
|
"learning_rate": 1.3819521147851122e-07, |
|
"loss": 0.7691, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9770540340488527, |
|
"grad_norm": 0.03450691947549809, |
|
"learning_rate": 1.947230525005006e-08, |
|
"loss": 0.753, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9948186528497409, |
|
"step": 168, |
|
"total_flos": 76517996494848.0, |
|
"train_loss": 0.8114972341628301, |
|
"train_runtime": 720.0182, |
|
"train_samples_per_second": 30.013, |
|
"train_steps_per_second": 0.233 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 76517996494848.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|