|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.23494860499265785, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.011747430249632892, |
|
"grad_norm": 1.5699902772903442, |
|
"learning_rate": 0.00027, |
|
"loss": 3.0983, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.023494860499265784, |
|
"grad_norm": 1.6029695272445679, |
|
"learning_rate": 0.00029991523567092526, |
|
"loss": 2.062, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03524229074889868, |
|
"grad_norm": 1.593436360359192, |
|
"learning_rate": 0.00029962234616583063, |
|
"loss": 1.2074, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04698972099853157, |
|
"grad_norm": 0.5851414799690247, |
|
"learning_rate": 0.00029912069357315393, |
|
"loss": 0.888, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05873715124816446, |
|
"grad_norm": 0.25992292165756226, |
|
"learning_rate": 0.0002984109778320875, |
|
"loss": 0.7685, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07048458149779736, |
|
"grad_norm": 0.21082307398319244, |
|
"learning_rate": 0.00029749418918542057, |
|
"loss": 0.7096, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08223201174743025, |
|
"grad_norm": 0.16843102872371674, |
|
"learning_rate": 0.0002963716067978866, |
|
"loss": 0.6901, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09397944199706314, |
|
"grad_norm": 0.12076722830533981, |
|
"learning_rate": 0.000295044796971387, |
|
"loss": 0.6702, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.10572687224669604, |
|
"grad_norm": 0.21371866762638092, |
|
"learning_rate": 0.000293515610959582, |
|
"loss": 0.6353, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11747430249632893, |
|
"grad_norm": 0.13458965718746185, |
|
"learning_rate": 0.0002917861823848985, |
|
"loss": 0.6479, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12922173274596183, |
|
"grad_norm": 0.265765517950058, |
|
"learning_rate": 0.0002898589242615568, |
|
"loss": 0.6244, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14096916299559473, |
|
"grad_norm": 0.1473032385110855, |
|
"learning_rate": 0.0002877365256287728, |
|
"loss": 0.6217, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1527165932452276, |
|
"grad_norm": 0.1591167151927948, |
|
"learning_rate": 0.00028542194779883047, |
|
"loss": 0.6022, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1644640234948605, |
|
"grad_norm": 0.13270772993564606, |
|
"learning_rate": 0.00028291842022526133, |
|
"loss": 0.6098, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1762114537444934, |
|
"grad_norm": 0.1444919854402542, |
|
"learning_rate": 0.0002802294359968954, |
|
"loss": 0.5971, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18795888399412627, |
|
"grad_norm": 0.1571902334690094, |
|
"learning_rate": 0.0002773587469640702, |
|
"loss": 0.5937, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.19970631424375918, |
|
"grad_norm": 0.11585285514593124, |
|
"learning_rate": 0.0002743103585037989, |
|
"loss": 0.6054, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.21145374449339208, |
|
"grad_norm": 0.10303252190351486, |
|
"learning_rate": 0.0002710885239312008, |
|
"loss": 0.5708, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.22320117474302498, |
|
"grad_norm": 0.09355439245700836, |
|
"learning_rate": 0.00026769773856499167, |
|
"loss": 0.5806, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.23494860499265785, |
|
"grad_norm": 0.09288550913333893, |
|
"learning_rate": 0.0002641427334553158, |
|
"loss": 0.5747, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 851, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 40, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.2381453081706496e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|