adammandic87's picture
Training in progress, step 200, checkpoint
2b56eca verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.5326231691078562,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002663115845539281,
"eval_loss": 11.090938568115234,
"eval_runtime": 1.5949,
"eval_samples_per_second": 99.696,
"eval_steps_per_second": 50.161,
"step": 1
},
{
"epoch": 0.02663115845539281,
"grad_norm": 0.8776910305023193,
"learning_rate": 0.0002,
"loss": 44.3478,
"step": 10
},
{
"epoch": 0.05326231691078562,
"grad_norm": 1.3439457416534424,
"learning_rate": 0.0002,
"loss": 44.3426,
"step": 20
},
{
"epoch": 0.07989347536617843,
"grad_norm": 1.0712538957595825,
"learning_rate": 0.0002,
"loss": 44.2427,
"step": 30
},
{
"epoch": 0.10652463382157124,
"grad_norm": 0.9012938141822815,
"learning_rate": 0.0002,
"loss": 44.2393,
"step": 40
},
{
"epoch": 0.13315579227696406,
"grad_norm": 0.9406182765960693,
"learning_rate": 0.0002,
"loss": 44.2016,
"step": 50
},
{
"epoch": 0.13315579227696406,
"eval_loss": 11.027718544006348,
"eval_runtime": 0.4329,
"eval_samples_per_second": 367.264,
"eval_steps_per_second": 184.787,
"step": 50
},
{
"epoch": 0.15978695073235685,
"grad_norm": 0.8746024966239929,
"learning_rate": 0.0002,
"loss": 44.0868,
"step": 60
},
{
"epoch": 0.18641810918774968,
"grad_norm": 0.9289256930351257,
"learning_rate": 0.0002,
"loss": 44.0484,
"step": 70
},
{
"epoch": 0.21304926764314247,
"grad_norm": 0.9994912147521973,
"learning_rate": 0.0002,
"loss": 43.9856,
"step": 80
},
{
"epoch": 0.2396804260985353,
"grad_norm": 0.9738786816596985,
"learning_rate": 0.0002,
"loss": 43.9872,
"step": 90
},
{
"epoch": 0.2663115845539281,
"grad_norm": 0.9224445819854736,
"learning_rate": 0.0002,
"loss": 43.9284,
"step": 100
},
{
"epoch": 0.2663115845539281,
"eval_loss": 10.973572731018066,
"eval_runtime": 0.4372,
"eval_samples_per_second": 363.671,
"eval_steps_per_second": 182.979,
"step": 100
},
{
"epoch": 0.2929427430093209,
"grad_norm": 1.1754047870635986,
"learning_rate": 0.0002,
"loss": 43.9785,
"step": 110
},
{
"epoch": 0.3195739014647137,
"grad_norm": 0.8138487935066223,
"learning_rate": 0.0002,
"loss": 43.9158,
"step": 120
},
{
"epoch": 0.34620505992010653,
"grad_norm": 0.9201197028160095,
"learning_rate": 0.0002,
"loss": 43.9227,
"step": 130
},
{
"epoch": 0.37283621837549935,
"grad_norm": 0.8453980088233948,
"learning_rate": 0.0002,
"loss": 43.9068,
"step": 140
},
{
"epoch": 0.3994673768308921,
"grad_norm": 0.8276954293251038,
"learning_rate": 0.0002,
"loss": 43.7968,
"step": 150
},
{
"epoch": 0.3994673768308921,
"eval_loss": 10.950996398925781,
"eval_runtime": 0.4461,
"eval_samples_per_second": 356.405,
"eval_steps_per_second": 179.323,
"step": 150
},
{
"epoch": 0.42609853528628494,
"grad_norm": 0.9496349096298218,
"learning_rate": 0.0002,
"loss": 43.7259,
"step": 160
},
{
"epoch": 0.45272969374167776,
"grad_norm": 0.8923008441925049,
"learning_rate": 0.0002,
"loss": 43.7386,
"step": 170
},
{
"epoch": 0.4793608521970706,
"grad_norm": 0.8389256596565247,
"learning_rate": 0.0002,
"loss": 43.809,
"step": 180
},
{
"epoch": 0.5059920106524634,
"grad_norm": 0.8299220204353333,
"learning_rate": 0.0002,
"loss": 43.865,
"step": 190
},
{
"epoch": 0.5326231691078562,
"grad_norm": 0.8370271325111389,
"learning_rate": 0.0002,
"loss": 43.8225,
"step": 200
},
{
"epoch": 0.5326231691078562,
"eval_loss": 10.938294410705566,
"eval_runtime": 0.4389,
"eval_samples_per_second": 362.31,
"eval_steps_per_second": 182.294,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 287637504000.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}