File size: 1,547 Bytes
3e02bbf 3ef6ac1 3e02bbf 8cf4366 3e02bbf 3ef6ac1 8cf4366 3ef6ac1 3e02bbf 3ef6ac1 8cf4366 3ef6ac1 3e02bbf 3ef6ac1 8cf4366 3ef6ac1 3e02bbf 3ef6ac1 8cf4366 3ef6ac1 3e02bbf 3ef6ac1 3e02bbf 3ef6ac1 8cf4366 3ef6ac1 3e02bbf 8cf4366 3e02bbf 3ef6ac1 3e02bbf 3ef6ac1 3e02bbf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.92,
"eval_steps": 500,
"global_step": 18,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11,
"learning_rate": 0.0001,
"loss": 2.0929,
"step": 1
},
{
"epoch": 0.53,
"learning_rate": 0.00018314696123025454,
"loss": 1.5756,
"step": 5
},
{
"epoch": 0.96,
"eval_loss": 0.2955792546272278,
"eval_runtime": 7.0548,
"eval_samples_per_second": 14.175,
"eval_steps_per_second": 1.843,
"step": 9
},
{
"epoch": 1.07,
"learning_rate": 0.0001,
"loss": 0.4344,
"step": 10
},
{
"epoch": 1.6,
"learning_rate": 1.6853038769745467e-05,
"loss": 0.2474,
"step": 15
},
{
"epoch": 1.92,
"eval_loss": 0.24160662293434143,
"eval_runtime": 6.0397,
"eval_samples_per_second": 16.557,
"eval_steps_per_second": 2.152,
"step": 18
},
{
"epoch": 1.92,
"step": 18,
"total_flos": 19174999326720.0,
"train_loss": 0.692977637052536,
"train_runtime": 275.1049,
"train_samples_per_second": 2.181,
"train_steps_per_second": 0.065
}
],
"logging_steps": 5,
"max_steps": 18,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 19174999326720.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|