adammandic87's picture
Training in progress, step 200, checkpoint
e7ad34b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.5131494547787043,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0025657472738935213,
"eval_loss": 1.6426724195480347,
"eval_runtime": 8.0379,
"eval_samples_per_second": 20.528,
"eval_steps_per_second": 10.326,
"step": 1
},
{
"epoch": 0.025657472738935216,
"grad_norm": 6.0922040939331055,
"learning_rate": 0.0002,
"loss": 5.4096,
"step": 10
},
{
"epoch": 0.05131494547787043,
"grad_norm": 4.473964214324951,
"learning_rate": 0.0002,
"loss": 4.5736,
"step": 20
},
{
"epoch": 0.07697241821680564,
"grad_norm": 5.79656457901001,
"learning_rate": 0.0002,
"loss": 4.236,
"step": 30
},
{
"epoch": 0.10262989095574086,
"grad_norm": 5.566458702087402,
"learning_rate": 0.0002,
"loss": 4.4715,
"step": 40
},
{
"epoch": 0.12828736369467608,
"grad_norm": 3.1581058502197266,
"learning_rate": 0.0002,
"loss": 4.2905,
"step": 50
},
{
"epoch": 0.12828736369467608,
"eval_loss": 0.9886534810066223,
"eval_runtime": 7.9175,
"eval_samples_per_second": 20.84,
"eval_steps_per_second": 10.483,
"step": 50
},
{
"epoch": 0.1539448364336113,
"grad_norm": 5.599130630493164,
"learning_rate": 0.0002,
"loss": 4.1246,
"step": 60
},
{
"epoch": 0.1796023091725465,
"grad_norm": 6.4759297370910645,
"learning_rate": 0.0002,
"loss": 4.1768,
"step": 70
},
{
"epoch": 0.20525978191148173,
"grad_norm": 3.1259078979492188,
"learning_rate": 0.0002,
"loss": 3.9254,
"step": 80
},
{
"epoch": 0.23091725465041693,
"grad_norm": 3.3770580291748047,
"learning_rate": 0.0002,
"loss": 4.1994,
"step": 90
},
{
"epoch": 0.25657472738935216,
"grad_norm": 3.869525194168091,
"learning_rate": 0.0002,
"loss": 4.144,
"step": 100
},
{
"epoch": 0.25657472738935216,
"eval_loss": 0.9652090072631836,
"eval_runtime": 7.9298,
"eval_samples_per_second": 20.807,
"eval_steps_per_second": 10.467,
"step": 100
},
{
"epoch": 0.28223220012828737,
"grad_norm": 5.021260738372803,
"learning_rate": 0.0002,
"loss": 4.0535,
"step": 110
},
{
"epoch": 0.3078896728672226,
"grad_norm": 3.946540355682373,
"learning_rate": 0.0002,
"loss": 4.2911,
"step": 120
},
{
"epoch": 0.3335471456061578,
"grad_norm": 5.100417137145996,
"learning_rate": 0.0002,
"loss": 4.1604,
"step": 130
},
{
"epoch": 0.359204618345093,
"grad_norm": 3.0885791778564453,
"learning_rate": 0.0002,
"loss": 3.846,
"step": 140
},
{
"epoch": 0.38486209108402825,
"grad_norm": 3.596226453781128,
"learning_rate": 0.0002,
"loss": 4.1906,
"step": 150
},
{
"epoch": 0.38486209108402825,
"eval_loss": 0.9527939558029175,
"eval_runtime": 7.9098,
"eval_samples_per_second": 20.86,
"eval_steps_per_second": 10.493,
"step": 150
},
{
"epoch": 0.41051956382296345,
"grad_norm": 3.3079276084899902,
"learning_rate": 0.0002,
"loss": 3.9429,
"step": 160
},
{
"epoch": 0.43617703656189866,
"grad_norm": 2.9778153896331787,
"learning_rate": 0.0002,
"loss": 4.0369,
"step": 170
},
{
"epoch": 0.46183450930083386,
"grad_norm": 3.6058528423309326,
"learning_rate": 0.0002,
"loss": 4.5255,
"step": 180
},
{
"epoch": 0.48749198203976907,
"grad_norm": 5.9715447425842285,
"learning_rate": 0.0002,
"loss": 3.9174,
"step": 190
},
{
"epoch": 0.5131494547787043,
"grad_norm": 4.206804275512695,
"learning_rate": 0.0002,
"loss": 3.9483,
"step": 200
},
{
"epoch": 0.5131494547787043,
"eval_loss": 0.9494011998176575,
"eval_runtime": 7.9027,
"eval_samples_per_second": 20.879,
"eval_steps_per_second": 10.503,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.110150213926912e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}