full_catholic_combined_bf16 / trainer_state.json
WatsonOverHere's picture
Upload 11 files
4618484 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.8421052631578947,
"eval_steps": 500,
"global_step": 42,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07017543859649122,
"grad_norm": 10.52865219116211,
"learning_rate": 0.0,
"loss": 5.0782,
"step": 1
},
{
"epoch": 0.14035087719298245,
"grad_norm": 10.9165620803833,
"learning_rate": 2.0000000000000003e-06,
"loss": 5.1577,
"step": 2
},
{
"epoch": 0.21052631578947367,
"grad_norm": 11.1675443649292,
"learning_rate": 4.000000000000001e-06,
"loss": 5.1071,
"step": 3
},
{
"epoch": 0.2807017543859649,
"grad_norm": 10.448240280151367,
"learning_rate": 6e-06,
"loss": 5.1306,
"step": 4
},
{
"epoch": 0.3508771929824561,
"grad_norm": 10.703569412231445,
"learning_rate": 8.000000000000001e-06,
"loss": 5.0578,
"step": 5
},
{
"epoch": 0.42105263157894735,
"grad_norm": 10.621328353881836,
"learning_rate": 1e-05,
"loss": 4.9643,
"step": 6
},
{
"epoch": 0.49122807017543857,
"grad_norm": 10.412773132324219,
"learning_rate": 1.2e-05,
"loss": 4.8105,
"step": 7
},
{
"epoch": 0.5614035087719298,
"grad_norm": 10.143810272216797,
"learning_rate": 1.4000000000000001e-05,
"loss": 4.646,
"step": 8
},
{
"epoch": 0.631578947368421,
"grad_norm": 8.884021759033203,
"learning_rate": 1.6000000000000003e-05,
"loss": 4.478,
"step": 9
},
{
"epoch": 0.7017543859649122,
"grad_norm": 7.186990261077881,
"learning_rate": 1.8e-05,
"loss": 4.2099,
"step": 10
},
{
"epoch": 0.7719298245614035,
"grad_norm": 5.728164196014404,
"learning_rate": 2e-05,
"loss": 4.0316,
"step": 11
},
{
"epoch": 0.8421052631578947,
"grad_norm": 7.005011081695557,
"learning_rate": 2.2000000000000003e-05,
"loss": 3.9128,
"step": 12
},
{
"epoch": 0.9122807017543859,
"grad_norm": 8.047316551208496,
"learning_rate": 2.4e-05,
"loss": 3.7518,
"step": 13
},
{
"epoch": 0.9824561403508771,
"grad_norm": 6.397008895874023,
"learning_rate": 2.6000000000000002e-05,
"loss": 3.5923,
"step": 14
},
{
"epoch": 1.0,
"grad_norm": 4.87739372253418,
"learning_rate": 2.8000000000000003e-05,
"loss": 3.3723,
"step": 15
},
{
"epoch": 1.0701754385964912,
"grad_norm": 3.8167903423309326,
"learning_rate": 3e-05,
"loss": 3.3214,
"step": 16
},
{
"epoch": 1.1403508771929824,
"grad_norm": 3.506105899810791,
"learning_rate": 3.2000000000000005e-05,
"loss": 3.2114,
"step": 17
},
{
"epoch": 1.2105263157894737,
"grad_norm": 2.6847188472747803,
"learning_rate": 3.4000000000000007e-05,
"loss": 3.1914,
"step": 18
},
{
"epoch": 1.280701754385965,
"grad_norm": 2.0849263668060303,
"learning_rate": 3.6e-05,
"loss": 3.1213,
"step": 19
},
{
"epoch": 1.3508771929824561,
"grad_norm": 1.9912149906158447,
"learning_rate": 3.8e-05,
"loss": 3.0372,
"step": 20
},
{
"epoch": 1.4210526315789473,
"grad_norm": 1.7372136116027832,
"learning_rate": 4e-05,
"loss": 2.948,
"step": 21
},
{
"epoch": 1.4912280701754386,
"grad_norm": 1.7806267738342285,
"learning_rate": 4.2e-05,
"loss": 2.9269,
"step": 22
},
{
"epoch": 1.5614035087719298,
"grad_norm": 1.5322002172470093,
"learning_rate": 4.4000000000000006e-05,
"loss": 2.8877,
"step": 23
},
{
"epoch": 1.631578947368421,
"grad_norm": 1.6473444700241089,
"learning_rate": 4.600000000000001e-05,
"loss": 2.7834,
"step": 24
},
{
"epoch": 1.7017543859649122,
"grad_norm": 1.4832264184951782,
"learning_rate": 4.8e-05,
"loss": 2.7808,
"step": 25
},
{
"epoch": 1.7719298245614035,
"grad_norm": 1.4326348304748535,
"learning_rate": 5e-05,
"loss": 2.6874,
"step": 26
},
{
"epoch": 1.8421052631578947,
"grad_norm": 2.185389280319214,
"learning_rate": 5.2000000000000004e-05,
"loss": 2.667,
"step": 27
},
{
"epoch": 1.912280701754386,
"grad_norm": 4.019981384277344,
"learning_rate": 5.4000000000000005e-05,
"loss": 2.6309,
"step": 28
},
{
"epoch": 1.9824561403508771,
"grad_norm": 1.368363618850708,
"learning_rate": 5.6000000000000006e-05,
"loss": 2.5584,
"step": 29
},
{
"epoch": 2.0,
"grad_norm": 3.5658273696899414,
"learning_rate": 5.8e-05,
"loss": 2.4491,
"step": 30
},
{
"epoch": 2.0701754385964914,
"grad_norm": 2.8816487789154053,
"learning_rate": 6e-05,
"loss": 2.4853,
"step": 31
},
{
"epoch": 2.1403508771929824,
"grad_norm": 4.591163158416748,
"learning_rate": 6.2e-05,
"loss": 2.4324,
"step": 32
},
{
"epoch": 2.2105263157894735,
"grad_norm": 4.660356044769287,
"learning_rate": 6.400000000000001e-05,
"loss": 2.3511,
"step": 33
},
{
"epoch": 2.280701754385965,
"grad_norm": 2.6735446453094482,
"learning_rate": 6.6e-05,
"loss": 2.3241,
"step": 34
},
{
"epoch": 2.3508771929824563,
"grad_norm": 6.208858966827393,
"learning_rate": 6.800000000000001e-05,
"loss": 2.291,
"step": 35
},
{
"epoch": 2.4210526315789473,
"grad_norm": 3.5636632442474365,
"learning_rate": 7e-05,
"loss": 2.2498,
"step": 36
},
{
"epoch": 2.4912280701754383,
"grad_norm": 3.1954169273376465,
"learning_rate": 7.2e-05,
"loss": 2.23,
"step": 37
},
{
"epoch": 2.56140350877193,
"grad_norm": 3.8919479846954346,
"learning_rate": 7.4e-05,
"loss": 2.2094,
"step": 38
},
{
"epoch": 2.6315789473684212,
"grad_norm": 2.745952606201172,
"learning_rate": 7.6e-05,
"loss": 2.1835,
"step": 39
},
{
"epoch": 2.7017543859649122,
"grad_norm": 2.4738833904266357,
"learning_rate": 7.800000000000001e-05,
"loss": 2.1099,
"step": 40
},
{
"epoch": 2.7719298245614032,
"grad_norm": 5.345929145812988,
"learning_rate": 8e-05,
"loss": 2.0947,
"step": 41
},
{
"epoch": 2.8421052631578947,
"grad_norm": 2.14803147315979,
"learning_rate": 8.2e-05,
"loss": 2.0623,
"step": 42
}
],
"logging_steps": 1,
"max_steps": 42,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.301010223949414e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}