Tonic's picture
Upload folder using huggingface_hub
82c241c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 63,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"grad_norm": 1.6024624109268188,
"learning_rate": 0.000199522988805313,
"loss": 2.0529,
"mean_token_accuracy": 0.559639161452651,
"num_tokens": 64985.0,
"step": 5
},
{
"epoch": 0.16,
"grad_norm": 1.0667455196380615,
"learning_rate": 0.00019421451371364444,
"loss": 1.5192,
"mean_token_accuracy": 0.6313199333846569,
"num_tokens": 132654.0,
"step": 10
},
{
"epoch": 0.24,
"grad_norm": 0.6989722847938538,
"learning_rate": 0.00018335255037651302,
"loss": 1.2651,
"mean_token_accuracy": 0.66771609634161,
"num_tokens": 200863.0,
"step": 15
},
{
"epoch": 0.32,
"grad_norm": 0.49305838346481323,
"learning_rate": 0.0001676533860828358,
"loss": 1.1965,
"mean_token_accuracy": 0.6907071858644486,
"num_tokens": 265471.0,
"step": 20
},
{
"epoch": 0.4,
"grad_norm": 0.3976356089115143,
"learning_rate": 0.0001481522951638875,
"loss": 1.1184,
"mean_token_accuracy": 0.7011254012584687,
"num_tokens": 335362.0,
"step": 25
},
{
"epoch": 0.48,
"grad_norm": 0.32982659339904785,
"learning_rate": 0.00012613526829296622,
"loss": 1.0825,
"mean_token_accuracy": 0.7074193194508552,
"num_tokens": 403159.0,
"step": 30
},
{
"epoch": 0.56,
"grad_norm": 0.3139457404613495,
"learning_rate": 0.00010305420840860182,
"loss": 1.1136,
"mean_token_accuracy": 0.7029193982481956,
"num_tokens": 465862.0,
"step": 35
},
{
"epoch": 0.64,
"grad_norm": 0.3034254312515259,
"learning_rate": 8.043118562802488e-05,
"loss": 1.0445,
"mean_token_accuracy": 0.7117198631167412,
"num_tokens": 535415.0,
"step": 40
},
{
"epoch": 0.72,
"grad_norm": 0.3687556982040405,
"learning_rate": 5.9758065017582185e-05,
"loss": 1.0445,
"mean_token_accuracy": 0.7163990259170532,
"num_tokens": 600630.0,
"step": 45
},
{
"epoch": 0.8,
"grad_norm": 0.2981916069984436,
"learning_rate": 4.2398126221653236e-05,
"loss": 1.0519,
"mean_token_accuracy": 0.7169259950518608,
"num_tokens": 669353.0,
"step": 50
},
{
"epoch": 0.88,
"grad_norm": 0.324988454580307,
"learning_rate": 2.949616259940842e-05,
"loss": 1.0491,
"mean_token_accuracy": 0.7140479609370232,
"num_tokens": 735759.0,
"step": 55
},
{
"epoch": 0.96,
"grad_norm": 0.31940364837646484,
"learning_rate": 2.19029883414178e-05,
"loss": 1.0886,
"mean_token_accuracy": 0.7087796837091446,
"num_tokens": 799409.0,
"step": 60
}
],
"logging_steps": 5,
"max_steps": 63,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0164973639893696e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}