tahamajs's picture
add files
10434af verified
{
"best_global_step": 2,
"best_metric": 3.8960189819335938,
"best_model_checkpoint": null,
"epoch": 0.1711229946524064,
"eval_steps": 2,
"global_step": 32,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0106951871657754,
"grad_norm": 0.0,
"learning_rate": 0.0008999841241675994,
"loss": 3.7831,
"mean_token_accuracy": 0.5624366104602814,
"num_tokens": 4369.0,
"step": 2
},
{
"epoch": 0.0106951871657754,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 4369.0,
"eval_runtime": 12.0177,
"eval_samples_per_second": 1.664,
"eval_steps_per_second": 0.166,
"step": 2
},
{
"epoch": 0.0213903743315508,
"grad_norm": 0.0,
"learning_rate": 0.0008998571242294379,
"loss": 3.6904,
"mean_token_accuracy": 0.5642752200365067,
"num_tokens": 8944.0,
"step": 4
},
{
"epoch": 0.0213903743315508,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 8944.0,
"eval_runtime": 12.1652,
"eval_samples_per_second": 1.644,
"eval_steps_per_second": 0.164,
"step": 4
},
{
"epoch": 0.03208556149732621,
"grad_norm": 0.0,
"learning_rate": 0.000899603160196567,
"loss": 3.8616,
"mean_token_accuracy": 0.5486461520195007,
"num_tokens": 13866.0,
"step": 6
},
{
"epoch": 0.03208556149732621,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 13866.0,
"eval_runtime": 12.331,
"eval_samples_per_second": 1.622,
"eval_steps_per_second": 0.162,
"step": 6
},
{
"epoch": 0.0427807486631016,
"grad_norm": 0.0,
"learning_rate": 0.0008992223037457738,
"loss": 3.8405,
"mean_token_accuracy": 0.5545942336320877,
"num_tokens": 18338.0,
"step": 8
},
{
"epoch": 0.0427807486631016,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 18338.0,
"eval_runtime": 12.2655,
"eval_samples_per_second": 1.631,
"eval_steps_per_second": 0.163,
"step": 8
},
{
"epoch": 0.053475935828877004,
"grad_norm": 0.0,
"learning_rate": 0.0008987146623669524,
"loss": 3.7968,
"mean_token_accuracy": 0.5595056265592575,
"num_tokens": 23083.0,
"step": 10
},
{
"epoch": 0.053475935828877004,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 23083.0,
"eval_runtime": 12.0373,
"eval_samples_per_second": 1.661,
"eval_steps_per_second": 0.166,
"step": 10
},
{
"epoch": 0.06417112299465241,
"grad_norm": 0.0,
"learning_rate": 0.0008980803793327655,
"loss": 3.7388,
"mean_token_accuracy": 0.557638630270958,
"num_tokens": 28012.0,
"step": 12
},
{
"epoch": 0.06417112299465241,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 28012.0,
"eval_runtime": 12.2491,
"eval_samples_per_second": 1.633,
"eval_steps_per_second": 0.163,
"step": 12
},
{
"epoch": 0.0748663101604278,
"grad_norm": 0.0,
"learning_rate": 0.0008973196336582091,
"loss": 3.7867,
"mean_token_accuracy": 0.5598014146089554,
"num_tokens": 32826.0,
"step": 14
},
{
"epoch": 0.0748663101604278,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 32826.0,
"eval_runtime": 12.3425,
"eval_samples_per_second": 1.62,
"eval_steps_per_second": 0.162,
"step": 14
},
{
"epoch": 0.0855614973262032,
"grad_norm": 0.0,
"learning_rate": 0.0008964326400500881,
"loss": 3.905,
"mean_token_accuracy": 0.5460383147001266,
"num_tokens": 37494.0,
"step": 16
},
{
"epoch": 0.0855614973262032,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 37494.0,
"eval_runtime": 12.2851,
"eval_samples_per_second": 1.628,
"eval_steps_per_second": 0.163,
"step": 16
},
{
"epoch": 0.0962566844919786,
"grad_norm": 0.0,
"learning_rate": 0.0008954196488464197,
"loss": 3.7338,
"mean_token_accuracy": 0.5698530972003937,
"num_tokens": 42166.0,
"step": 18
},
{
"epoch": 0.0962566844919786,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 42166.0,
"eval_runtime": 12.1294,
"eval_samples_per_second": 1.649,
"eval_steps_per_second": 0.165,
"step": 18
},
{
"epoch": 0.10695187165775401,
"grad_norm": 0.0,
"learning_rate": 0.0008942809459457797,
"loss": 3.7823,
"mean_token_accuracy": 0.5629921406507492,
"num_tokens": 46857.0,
"step": 20
},
{
"epoch": 0.10695187165775401,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 46857.0,
"eval_runtime": 12.2334,
"eval_samples_per_second": 1.635,
"eval_steps_per_second": 0.163,
"step": 20
},
{
"epoch": 0.11764705882352941,
"grad_norm": 0.0,
"learning_rate": 0.0008930168527266127,
"loss": 3.7864,
"mean_token_accuracy": 0.5499628484249115,
"num_tokens": 51942.0,
"step": 22
},
{
"epoch": 0.11764705882352941,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 51942.0,
"eval_runtime": 12.2575,
"eval_samples_per_second": 1.632,
"eval_steps_per_second": 0.163,
"step": 22
},
{
"epoch": 0.12834224598930483,
"grad_norm": 0.0,
"learning_rate": 0.0008916277259565292,
"loss": 3.6372,
"mean_token_accuracy": 0.5657302439212799,
"num_tokens": 56880.0,
"step": 24
},
{
"epoch": 0.12834224598930483,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 56880.0,
"eval_runtime": 12.28,
"eval_samples_per_second": 1.629,
"eval_steps_per_second": 0.163,
"step": 24
},
{
"epoch": 0.13903743315508021,
"grad_norm": 0.0,
"learning_rate": 0.0008901139576916137,
"loss": 3.788,
"mean_token_accuracy": 0.5580856949090958,
"num_tokens": 61710.0,
"step": 26
},
{
"epoch": 0.13903743315508021,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 61710.0,
"eval_runtime": 12.054,
"eval_samples_per_second": 1.659,
"eval_steps_per_second": 0.166,
"step": 26
},
{
"epoch": 0.1497326203208556,
"grad_norm": 0.0,
"learning_rate": 0.0008884759751657747,
"loss": 3.7585,
"mean_token_accuracy": 0.5520788580179214,
"num_tokens": 66437.0,
"step": 28
},
{
"epoch": 0.1497326203208556,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 66437.0,
"eval_runtime": 12.2403,
"eval_samples_per_second": 1.634,
"eval_steps_per_second": 0.163,
"step": 28
},
{
"epoch": 0.16042780748663102,
"grad_norm": 0.0,
"learning_rate": 0.0008867142406701649,
"loss": 3.7385,
"mean_token_accuracy": 0.5634956657886505,
"num_tokens": 71387.0,
"step": 30
},
{
"epoch": 0.16042780748663102,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 71387.0,
"eval_runtime": 12.2751,
"eval_samples_per_second": 1.629,
"eval_steps_per_second": 0.163,
"step": 30
},
{
"epoch": 0.1711229946524064,
"grad_norm": 0.0,
"learning_rate": 0.0008848292514227081,
"loss": 3.8708,
"mean_token_accuracy": 0.5420819222927094,
"num_tokens": 76271.0,
"step": 32
},
{
"epoch": 0.1711229946524064,
"eval_loss": 3.8960189819335938,
"eval_mean_token_accuracy": 0.55588099360466,
"eval_num_tokens": 76271.0,
"eval_runtime": 12.3155,
"eval_samples_per_second": 1.624,
"eval_steps_per_second": 0.162,
"step": 32
}
],
"logging_steps": 2,
"max_steps": 374,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 8,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2335294658641920.0,
"train_batch_size": 10,
"trial_name": null,
"trial_params": null
}