|
{ |
|
"best_metric": 1.4656140804290771, |
|
"best_model_checkpoint": "models/dehanalkautsar/mbert-3-with-multilingual-tokenizer-30k/checkpoint-24000", |
|
"epoch": 9.996235629260353, |
|
"eval_steps": 2000, |
|
"global_step": 24570, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8139179977617255, |
|
"grad_norm": 18.654558181762695, |
|
"learning_rate": 9.185999185999187e-05, |
|
"loss": 4.0579, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8139179977617255, |
|
"eval_loss": 2.5225632190704346, |
|
"eval_runtime": 73.2103, |
|
"eval_samples_per_second": 136.593, |
|
"eval_steps_per_second": 2.145, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.6275307762742903, |
|
"grad_norm": 15.436388969421387, |
|
"learning_rate": 8.371998371998372e-05, |
|
"loss": 2.3568, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.6275307762742903, |
|
"eval_loss": 2.068246603012085, |
|
"eval_runtime": 73.2939, |
|
"eval_samples_per_second": 136.437, |
|
"eval_steps_per_second": 2.142, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.4411435547868554, |
|
"grad_norm": 13.966560363769531, |
|
"learning_rate": 7.557997557997558e-05, |
|
"loss": 2.0445, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.4411435547868554, |
|
"eval_loss": 1.8596677780151367, |
|
"eval_runtime": 73.1178, |
|
"eval_samples_per_second": 136.766, |
|
"eval_steps_per_second": 2.147, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.25475633329942, |
|
"grad_norm": 14.865880012512207, |
|
"learning_rate": 6.743996743996744e-05, |
|
"loss": 1.8887, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.25475633329942, |
|
"eval_loss": 1.7477023601531982, |
|
"eval_runtime": 73.1164, |
|
"eval_samples_per_second": 136.768, |
|
"eval_steps_per_second": 2.147, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.068369111811985, |
|
"grad_norm": 13.934277534484863, |
|
"learning_rate": 5.929995929995931e-05, |
|
"loss": 1.7863, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.068369111811985, |
|
"eval_loss": 1.677494764328003, |
|
"eval_runtime": 73.1043, |
|
"eval_samples_per_second": 136.791, |
|
"eval_steps_per_second": 2.148, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.882287109573711, |
|
"grad_norm": 13.954177856445312, |
|
"learning_rate": 5.115995115995116e-05, |
|
"loss": 1.7168, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.882287109573711, |
|
"eval_loss": 1.6307039260864258, |
|
"eval_runtime": 73.09, |
|
"eval_samples_per_second": 136.818, |
|
"eval_steps_per_second": 2.148, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.6958998880862755, |
|
"grad_norm": 13.182450294494629, |
|
"learning_rate": 4.301994301994302e-05, |
|
"loss": 1.6626, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.6958998880862755, |
|
"eval_loss": 1.5682964324951172, |
|
"eval_runtime": 73.1095, |
|
"eval_samples_per_second": 136.781, |
|
"eval_steps_per_second": 2.147, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.50951266659884, |
|
"grad_norm": 13.514598846435547, |
|
"learning_rate": 3.487993487993488e-05, |
|
"loss": 1.6187, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.50951266659884, |
|
"eval_loss": 1.5308398008346558, |
|
"eval_runtime": 73.125, |
|
"eval_samples_per_second": 136.752, |
|
"eval_steps_per_second": 2.147, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.323125445111405, |
|
"grad_norm": 14.169295310974121, |
|
"learning_rate": 2.673992673992674e-05, |
|
"loss": 1.5873, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.323125445111405, |
|
"eval_loss": 1.5098525285720825, |
|
"eval_runtime": 73.107, |
|
"eval_samples_per_second": 136.786, |
|
"eval_steps_per_second": 2.148, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.13673822362397, |
|
"grad_norm": 13.590775489807129, |
|
"learning_rate": 1.85999185999186e-05, |
|
"loss": 1.5568, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 8.13673822362397, |
|
"eval_loss": 1.4912240505218506, |
|
"eval_runtime": 73.2463, |
|
"eval_samples_per_second": 136.526, |
|
"eval_steps_per_second": 2.143, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 8.950656221385696, |
|
"grad_norm": 13.897253036499023, |
|
"learning_rate": 1.045991045991046e-05, |
|
"loss": 1.5348, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 8.950656221385696, |
|
"eval_loss": 1.4697929620742798, |
|
"eval_runtime": 73.2504, |
|
"eval_samples_per_second": 136.518, |
|
"eval_steps_per_second": 2.143, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 9.76426899989826, |
|
"grad_norm": 13.867361068725586, |
|
"learning_rate": 2.31990231990232e-06, |
|
"loss": 1.5172, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 9.76426899989826, |
|
"eval_loss": 1.4656140804290771, |
|
"eval_runtime": 73.2214, |
|
"eval_samples_per_second": 136.572, |
|
"eval_steps_per_second": 2.144, |
|
"step": 24000 |
|
} |
|
], |
|
"logging_steps": 2000, |
|
"max_steps": 24570, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 2000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6550302836575232e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|