File size: 2,046 Bytes
1c116dd 04fa761 aca2a90 1c116dd aca2a90 04fa761 1c116dd aca2a90 04fa761 aca2a90 04fa761 1c116dd aca2a90 04fa761 1c116dd aca2a90 04fa761 1c116dd aca2a90 04fa761 aca2a90 04fa761 1c116dd aca2a90 04fa761 1c116dd aca2a90 1c116dd aca2a90 1c116dd 04fa761 1c116dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
{
"best_metric": 6.200850486755371,
"best_model_checkpoint": "/home/p318482/babyLM_controlled/models_trained_last/de_mlm/de_childes_30/checkpoint-8000",
"epoch": 6.008261359369133,
"eval_steps": 2000,
"global_step": 8000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.5020653398422832,
"eval_loss": 7.472109317779541,
"eval_runtime": 2.5707,
"eval_samples_per_second": 1291.498,
"eval_steps_per_second": 80.913,
"step": 2000
},
{
"epoch": 3.0041306796845664,
"grad_norm": 0.9529591798782349,
"learning_rate": 1e-05,
"loss": 7.3851,
"step": 4000
},
{
"epoch": 3.0041306796845664,
"eval_loss": 6.4175896644592285,
"eval_runtime": 2.5761,
"eval_samples_per_second": 1288.76,
"eval_steps_per_second": 80.742,
"step": 4000
},
{
"epoch": 4.50619601952685,
"eval_loss": 6.3026041984558105,
"eval_runtime": 2.5876,
"eval_samples_per_second": 1283.023,
"eval_steps_per_second": 80.382,
"step": 6000
},
{
"epoch": 6.008261359369133,
"grad_norm": 1.5569523572921753,
"learning_rate": 2e-05,
"loss": 6.0706,
"step": 8000
},
{
"epoch": 6.008261359369133,
"eval_loss": 6.200850486755371,
"eval_runtime": 2.6505,
"eval_samples_per_second": 1252.599,
"eval_steps_per_second": 78.476,
"step": 8000
}
],
"logging_steps": 4000,
"max_steps": 100000,
"num_input_tokens_seen": 0,
"num_train_epochs": 76,
"save_steps": 4000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2495025534468096.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}
|