|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8456659619450317, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.042283298097251586, |
|
"grad_norm": 1.2008394002914429, |
|
"learning_rate": 4.9975392245612254e-05, |
|
"loss": 4.1473, |
|
"num_input_tokens_seen": 68384, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.08456659619450317, |
|
"grad_norm": 1.1565380096435547, |
|
"learning_rate": 4.9901617425775067e-05, |
|
"loss": 4.021, |
|
"num_input_tokens_seen": 128224, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12684989429175475, |
|
"grad_norm": 0.972896158695221, |
|
"learning_rate": 4.9778820775100185e-05, |
|
"loss": 3.8212, |
|
"num_input_tokens_seen": 197760, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.16913319238900634, |
|
"grad_norm": 1.0641047954559326, |
|
"learning_rate": 4.9607244033573156e-05, |
|
"loss": 3.7653, |
|
"num_input_tokens_seen": 262832, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.21141649048625794, |
|
"grad_norm": 1.0994701385498047, |
|
"learning_rate": 4.93872249706591e-05, |
|
"loss": 3.7434, |
|
"num_input_tokens_seen": 333472, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2536997885835095, |
|
"grad_norm": 1.193864107131958, |
|
"learning_rate": 4.91191967203629e-05, |
|
"loss": 3.5488, |
|
"num_input_tokens_seen": 393616, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2959830866807611, |
|
"grad_norm": 1.0215297937393188, |
|
"learning_rate": 4.8803686928552736e-05, |
|
"loss": 3.5732, |
|
"num_input_tokens_seen": 458240, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3382663847780127, |
|
"grad_norm": 1.312198519706726, |
|
"learning_rate": 4.84413167142257e-05, |
|
"loss": 3.7108, |
|
"num_input_tokens_seen": 524576, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.38054968287526425, |
|
"grad_norm": 1.3579617738723755, |
|
"learning_rate": 4.803279944676032e-05, |
|
"loss": 3.6871, |
|
"num_input_tokens_seen": 591856, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.42283298097251587, |
|
"grad_norm": 1.4524191617965698, |
|
"learning_rate": 4.7578939341563095e-05, |
|
"loss": 3.286, |
|
"num_input_tokens_seen": 655648, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.46511627906976744, |
|
"grad_norm": 1.248831033706665, |
|
"learning_rate": 4.70806298768736e-05, |
|
"loss": 3.5377, |
|
"num_input_tokens_seen": 721280, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.507399577167019, |
|
"grad_norm": 1.2427473068237305, |
|
"learning_rate": 4.653885203484515e-05, |
|
"loss": 3.56, |
|
"num_input_tokens_seen": 784688, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5496828752642706, |
|
"grad_norm": 1.323653221130371, |
|
"learning_rate": 4.595467237036329e-05, |
|
"loss": 3.4937, |
|
"num_input_tokens_seen": 850848, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5919661733615222, |
|
"grad_norm": 1.3730229139328003, |
|
"learning_rate": 4.532924091140417e-05, |
|
"loss": 3.3823, |
|
"num_input_tokens_seen": 912480, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6342494714587738, |
|
"grad_norm": 1.5328903198242188, |
|
"learning_rate": 4.466378889506607e-05, |
|
"loss": 3.3798, |
|
"num_input_tokens_seen": 978448, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6765327695560254, |
|
"grad_norm": 1.4153543710708618, |
|
"learning_rate": 4.395962634373097e-05, |
|
"loss": 3.3044, |
|
"num_input_tokens_seen": 1041280, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.718816067653277, |
|
"grad_norm": 1.6301600933074951, |
|
"learning_rate": 4.3218139486127854e-05, |
|
"loss": 3.3661, |
|
"num_input_tokens_seen": 1102224, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7610993657505285, |
|
"grad_norm": 1.6634522676467896, |
|
"learning_rate": 4.2440788028374624e-05, |
|
"loss": 3.3829, |
|
"num_input_tokens_seen": 1166576, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8033826638477801, |
|
"grad_norm": 1.4539167881011963, |
|
"learning_rate": 4.1629102280370904e-05, |
|
"loss": 3.2241, |
|
"num_input_tokens_seen": 1230096, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8456659619450317, |
|
"grad_norm": 1.453364372253418, |
|
"learning_rate": 4.0784680143198836e-05, |
|
"loss": 3.0931, |
|
"num_input_tokens_seen": 1297968, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 354, |
|
"num_input_tokens_seen": 1297968, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5457586224840704e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|