|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.87956643918105, |
|
"eval_steps": 3110, |
|
"global_step": 31100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 9.987956643918105, |
|
"grad_norm": 0.008763309568166733, |
|
"learning_rate": 9.002572347266882e-06, |
|
"loss": 2.156, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 9.987956643918105, |
|
"eval_accuracy": 0.04915439834529748, |
|
"eval_loss": 2.9957287311553955, |
|
"eval_runtime": 36.4037, |
|
"eval_samples_per_second": 225.774, |
|
"eval_steps_per_second": 11.29, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 19.97591328783621, |
|
"grad_norm": 1.9511775970458984, |
|
"learning_rate": 8.002572347266881e-06, |
|
"loss": 2.1411, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 19.97591328783621, |
|
"eval_accuracy": 0.05371699720160603, |
|
"eval_loss": 3.0536019802093506, |
|
"eval_runtime": 36.6549, |
|
"eval_samples_per_second": 224.227, |
|
"eval_steps_per_second": 11.213, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 29.963869931754317, |
|
"grad_norm": 2.7666614055633545, |
|
"learning_rate": 7.002572347266882e-06, |
|
"loss": 1.9888, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 29.963869931754317, |
|
"eval_accuracy": 0.05665733868678266, |
|
"eval_loss": 3.3323957920074463, |
|
"eval_runtime": 36.5927, |
|
"eval_samples_per_second": 224.607, |
|
"eval_steps_per_second": 11.232, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 39.95182657567242, |
|
"grad_norm": 3.709224224090576, |
|
"learning_rate": 6.002572347266882e-06, |
|
"loss": 1.8759, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 39.95182657567242, |
|
"eval_accuracy": 0.05706290302956564, |
|
"eval_loss": 3.6092300415039062, |
|
"eval_runtime": 36.6501, |
|
"eval_samples_per_second": 224.256, |
|
"eval_steps_per_second": 11.214, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 49.939783219590524, |
|
"grad_norm": 3.455683708190918, |
|
"learning_rate": 5.002893890675241e-06, |
|
"loss": 1.8129, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 49.939783219590524, |
|
"eval_accuracy": 0.05752524638033824, |
|
"eval_loss": 3.809110641479492, |
|
"eval_runtime": 36.6247, |
|
"eval_samples_per_second": 224.411, |
|
"eval_steps_per_second": 11.222, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 59.927739863508634, |
|
"grad_norm": 3.957350254058838, |
|
"learning_rate": 4.003536977491962e-06, |
|
"loss": 1.7708, |
|
"step": 18660 |
|
}, |
|
{ |
|
"epoch": 59.927739863508634, |
|
"eval_accuracy": 0.05777264062943586, |
|
"eval_loss": 3.9897844791412354, |
|
"eval_runtime": 36.4616, |
|
"eval_samples_per_second": 225.415, |
|
"eval_steps_per_second": 11.272, |
|
"step": 18660 |
|
}, |
|
{ |
|
"epoch": 69.91569650742673, |
|
"grad_norm": 4.713634014129639, |
|
"learning_rate": 3.003858520900322e-06, |
|
"loss": 1.7413, |
|
"step": 21770 |
|
}, |
|
{ |
|
"epoch": 69.91569650742673, |
|
"eval_accuracy": 0.057914588149409904, |
|
"eval_loss": 4.2735114097595215, |
|
"eval_runtime": 36.4969, |
|
"eval_samples_per_second": 225.197, |
|
"eval_steps_per_second": 11.261, |
|
"step": 21770 |
|
}, |
|
{ |
|
"epoch": 79.90365315134484, |
|
"grad_norm": 3.9962944984436035, |
|
"learning_rate": 2.0045016077170422e-06, |
|
"loss": 1.7172, |
|
"step": 24880 |
|
}, |
|
{ |
|
"epoch": 79.90365315134484, |
|
"eval_accuracy": 0.0580362574522448, |
|
"eval_loss": 4.343382835388184, |
|
"eval_runtime": 36.6256, |
|
"eval_samples_per_second": 224.406, |
|
"eval_steps_per_second": 11.222, |
|
"step": 24880 |
|
}, |
|
{ |
|
"epoch": 89.89160979526295, |
|
"grad_norm": 2.9578592777252197, |
|
"learning_rate": 1.0048231511254019e-06, |
|
"loss": 1.7056, |
|
"step": 27990 |
|
}, |
|
{ |
|
"epoch": 89.89160979526295, |
|
"eval_accuracy": 0.058063295075097, |
|
"eval_loss": 4.511988162994385, |
|
"eval_runtime": 36.4648, |
|
"eval_samples_per_second": 225.395, |
|
"eval_steps_per_second": 11.271, |
|
"step": 27990 |
|
}, |
|
{ |
|
"epoch": 99.87956643918105, |
|
"grad_norm": 3.108112335205078, |
|
"learning_rate": 5.466237942122187e-09, |
|
"loss": 1.7018, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 99.87956643918105, |
|
"eval_accuracy": 0.05819442754593016, |
|
"eval_loss": 4.51352071762085, |
|
"eval_runtime": 36.5052, |
|
"eval_samples_per_second": 225.146, |
|
"eval_steps_per_second": 11.259, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 99.87956643918105, |
|
"step": 31100, |
|
"total_flos": 1.4711495868588088e+18, |
|
"train_loss": 1.861133707420619, |
|
"train_runtime": 58944.542, |
|
"train_samples_per_second": 105.616, |
|
"train_steps_per_second": 0.528 |
|
} |
|
], |
|
"logging_steps": 3110, |
|
"max_steps": 31100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4711495868588088e+18, |
|
"train_batch_size": 25, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|