|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9869281045751634, |
|
"eval_steps": 500, |
|
"global_step": 228, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13071895424836602, |
|
"grad_norm": 13.640703803287584, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9844, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.26143790849673204, |
|
"grad_norm": 0.9447823765191696, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8863, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.39215686274509803, |
|
"grad_norm": 1.038467083575288, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8449, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5228758169934641, |
|
"grad_norm": 1.6427820915875102, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8257, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6535947712418301, |
|
"grad_norm": 1.7936488531165335, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8169, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7843137254901961, |
|
"grad_norm": 0.7697816321414731, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8056, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9150326797385621, |
|
"grad_norm": 1.250994290613137, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7971, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0490196078431373, |
|
"grad_norm": 0.843433753244107, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8367, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1797385620915033, |
|
"grad_norm": 0.6027656670101825, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7483, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.3104575163398693, |
|
"grad_norm": 0.8024576817469242, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7428, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.4411764705882353, |
|
"grad_norm": 0.7594730875779195, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7401, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.5718954248366013, |
|
"grad_norm": 0.5293998500966177, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7426, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.7026143790849673, |
|
"grad_norm": 0.9249042901353932, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7406, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.8333333333333335, |
|
"grad_norm": 0.5767932991870924, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7379, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.9640522875816995, |
|
"grad_norm": 0.5974379920218519, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7337, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.0980392156862746, |
|
"grad_norm": 1.3799060310730653, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7541, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.2287581699346406, |
|
"grad_norm": 0.8440697657265467, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6849, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.3594771241830066, |
|
"grad_norm": 0.6523664577578698, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6828, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.4901960784313726, |
|
"grad_norm": 0.5604368514967889, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6833, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.6209150326797386, |
|
"grad_norm": 0.651015676014187, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6825, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.7516339869281046, |
|
"grad_norm": 0.6331718263692562, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6826, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.8823529411764706, |
|
"grad_norm": 0.6748382635791591, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6867, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.9869281045751634, |
|
"step": 228, |
|
"total_flos": 381489732648960.0, |
|
"train_loss": 0.7633350188272041, |
|
"train_runtime": 3333.6415, |
|
"train_samples_per_second": 35.22, |
|
"train_steps_per_second": 0.068 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 228, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 381489732648960.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|