|
{ |
|
"best_global_step": 2, |
|
"best_metric": 3.8960189819335938, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1711229946524064, |
|
"eval_steps": 2, |
|
"global_step": 32, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0106951871657754, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008999841241675994, |
|
"loss": 3.7831, |
|
"mean_token_accuracy": 0.5624366104602814, |
|
"num_tokens": 4369.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0106951871657754, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 4369.0, |
|
"eval_runtime": 12.0177, |
|
"eval_samples_per_second": 1.664, |
|
"eval_steps_per_second": 0.166, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0213903743315508, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008998571242294379, |
|
"loss": 3.6904, |
|
"mean_token_accuracy": 0.5642752200365067, |
|
"num_tokens": 8944.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0213903743315508, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 8944.0, |
|
"eval_runtime": 12.1652, |
|
"eval_samples_per_second": 1.644, |
|
"eval_steps_per_second": 0.164, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03208556149732621, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.000899603160196567, |
|
"loss": 3.8616, |
|
"mean_token_accuracy": 0.5486461520195007, |
|
"num_tokens": 13866.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03208556149732621, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 13866.0, |
|
"eval_runtime": 12.331, |
|
"eval_samples_per_second": 1.622, |
|
"eval_steps_per_second": 0.162, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0427807486631016, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008992223037457738, |
|
"loss": 3.8405, |
|
"mean_token_accuracy": 0.5545942336320877, |
|
"num_tokens": 18338.0, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0427807486631016, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 18338.0, |
|
"eval_runtime": 12.2655, |
|
"eval_samples_per_second": 1.631, |
|
"eval_steps_per_second": 0.163, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.053475935828877004, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008987146623669524, |
|
"loss": 3.7968, |
|
"mean_token_accuracy": 0.5595056265592575, |
|
"num_tokens": 23083.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.053475935828877004, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 23083.0, |
|
"eval_runtime": 12.0373, |
|
"eval_samples_per_second": 1.661, |
|
"eval_steps_per_second": 0.166, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06417112299465241, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008980803793327655, |
|
"loss": 3.7388, |
|
"mean_token_accuracy": 0.557638630270958, |
|
"num_tokens": 28012.0, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06417112299465241, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 28012.0, |
|
"eval_runtime": 12.2491, |
|
"eval_samples_per_second": 1.633, |
|
"eval_steps_per_second": 0.163, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0748663101604278, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008973196336582091, |
|
"loss": 3.7867, |
|
"mean_token_accuracy": 0.5598014146089554, |
|
"num_tokens": 32826.0, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0748663101604278, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 32826.0, |
|
"eval_runtime": 12.3425, |
|
"eval_samples_per_second": 1.62, |
|
"eval_steps_per_second": 0.162, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0855614973262032, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008964326400500881, |
|
"loss": 3.905, |
|
"mean_token_accuracy": 0.5460383147001266, |
|
"num_tokens": 37494.0, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0855614973262032, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 37494.0, |
|
"eval_runtime": 12.2851, |
|
"eval_samples_per_second": 1.628, |
|
"eval_steps_per_second": 0.163, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0962566844919786, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008954196488464197, |
|
"loss": 3.7338, |
|
"mean_token_accuracy": 0.5698530972003937, |
|
"num_tokens": 42166.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0962566844919786, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 42166.0, |
|
"eval_runtime": 12.1294, |
|
"eval_samples_per_second": 1.649, |
|
"eval_steps_per_second": 0.165, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.10695187165775401, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008942809459457797, |
|
"loss": 3.7823, |
|
"mean_token_accuracy": 0.5629921406507492, |
|
"num_tokens": 46857.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10695187165775401, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 46857.0, |
|
"eval_runtime": 12.2334, |
|
"eval_samples_per_second": 1.635, |
|
"eval_steps_per_second": 0.163, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008930168527266127, |
|
"loss": 3.7864, |
|
"mean_token_accuracy": 0.5499628484249115, |
|
"num_tokens": 51942.0, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 51942.0, |
|
"eval_runtime": 12.2575, |
|
"eval_samples_per_second": 1.632, |
|
"eval_steps_per_second": 0.163, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.12834224598930483, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008916277259565292, |
|
"loss": 3.6372, |
|
"mean_token_accuracy": 0.5657302439212799, |
|
"num_tokens": 56880.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.12834224598930483, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 56880.0, |
|
"eval_runtime": 12.28, |
|
"eval_samples_per_second": 1.629, |
|
"eval_steps_per_second": 0.163, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.13903743315508021, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008901139576916137, |
|
"loss": 3.788, |
|
"mean_token_accuracy": 0.5580856949090958, |
|
"num_tokens": 61710.0, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.13903743315508021, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 61710.0, |
|
"eval_runtime": 12.054, |
|
"eval_samples_per_second": 1.659, |
|
"eval_steps_per_second": 0.166, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1497326203208556, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008884759751657747, |
|
"loss": 3.7585, |
|
"mean_token_accuracy": 0.5520788580179214, |
|
"num_tokens": 66437.0, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1497326203208556, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 66437.0, |
|
"eval_runtime": 12.2403, |
|
"eval_samples_per_second": 1.634, |
|
"eval_steps_per_second": 0.163, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.16042780748663102, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008867142406701649, |
|
"loss": 3.7385, |
|
"mean_token_accuracy": 0.5634956657886505, |
|
"num_tokens": 71387.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16042780748663102, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 71387.0, |
|
"eval_runtime": 12.2751, |
|
"eval_samples_per_second": 1.629, |
|
"eval_steps_per_second": 0.163, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1711229946524064, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0008848292514227081, |
|
"loss": 3.8708, |
|
"mean_token_accuracy": 0.5420819222927094, |
|
"num_tokens": 76271.0, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1711229946524064, |
|
"eval_loss": 3.8960189819335938, |
|
"eval_mean_token_accuracy": 0.55588099360466, |
|
"eval_num_tokens": 76271.0, |
|
"eval_runtime": 12.3155, |
|
"eval_samples_per_second": 1.624, |
|
"eval_steps_per_second": 0.162, |
|
"step": 32 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 374, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 8, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2335294658641920.0, |
|
"train_batch_size": 10, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|