vertings6's picture
Training in progress, step 30, checkpoint
7c997d3 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.0010169577708285663,
"eval_steps": 8,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 3.3898592360952214e-05,
"eval_loss": 1.5521913766860962,
"eval_runtime": 2912.8778,
"eval_samples_per_second": 4.264,
"eval_steps_per_second": 2.132,
"step": 1
},
{
"epoch": 0.00010169577708285663,
"grad_norm": 11.249643325805664,
"learning_rate": 3e-05,
"loss": 5.7115,
"step": 3
},
{
"epoch": 0.00020339155416571327,
"grad_norm": 7.744796276092529,
"learning_rate": 6e-05,
"loss": 6.0804,
"step": 6
},
{
"epoch": 0.0002711887388876177,
"eval_loss": 1.517215371131897,
"eval_runtime": 2926.1627,
"eval_samples_per_second": 4.245,
"eval_steps_per_second": 2.123,
"step": 8
},
{
"epoch": 0.0003050873312485699,
"grad_norm": 5.9153151512146,
"learning_rate": 9e-05,
"loss": 6.9199,
"step": 9
},
{
"epoch": 0.00040678310833142654,
"grad_norm": 86.80118560791016,
"learning_rate": 9.755282581475769e-05,
"loss": 5.86,
"step": 12
},
{
"epoch": 0.0005084788854142832,
"grad_norm": 1.8858240842819214,
"learning_rate": 8.535533905932738e-05,
"loss": 5.5372,
"step": 15
},
{
"epoch": 0.0005423774777752354,
"eval_loss": 1.3532363176345825,
"eval_runtime": 2923.6367,
"eval_samples_per_second": 4.248,
"eval_steps_per_second": 2.124,
"step": 16
},
{
"epoch": 0.0006101746624971398,
"grad_norm": 2.218549966812134,
"learning_rate": 6.545084971874738e-05,
"loss": 5.5828,
"step": 18
},
{
"epoch": 0.0007118704395799964,
"grad_norm": 1.8414465188980103,
"learning_rate": 4.2178276747988446e-05,
"loss": 5.3067,
"step": 21
},
{
"epoch": 0.0008135662166628531,
"grad_norm": 2.2933406829833984,
"learning_rate": 2.061073738537635e-05,
"loss": 5.2307,
"step": 24
},
{
"epoch": 0.0008135662166628531,
"eval_loss": 1.2488714456558228,
"eval_runtime": 2929.3994,
"eval_samples_per_second": 4.24,
"eval_steps_per_second": 2.12,
"step": 24
},
{
"epoch": 0.0009152619937457097,
"grad_norm": 2.308497190475464,
"learning_rate": 5.449673790581611e-06,
"loss": 5.3056,
"step": 27
},
{
"epoch": 0.0010169577708285663,
"grad_norm": 2.603907585144043,
"learning_rate": 0.0,
"loss": 4.5462,
"step": 30
}
],
"logging_steps": 3,
"max_steps": 30,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.042527481856e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}