{ "best_metric": 1.4656140804290771, "best_model_checkpoint": "models/dehanalkautsar/mbert-3-with-multilingual-tokenizer-30k/checkpoint-24000", "epoch": 9.996235629260353, "eval_steps": 2000, "global_step": 24570, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8139179977617255, "grad_norm": 18.654558181762695, "learning_rate": 9.185999185999187e-05, "loss": 4.0579, "step": 2000 }, { "epoch": 0.8139179977617255, "eval_loss": 2.5225632190704346, "eval_runtime": 73.2103, "eval_samples_per_second": 136.593, "eval_steps_per_second": 2.145, "step": 2000 }, { "epoch": 1.6275307762742903, "grad_norm": 15.436388969421387, "learning_rate": 8.371998371998372e-05, "loss": 2.3568, "step": 4000 }, { "epoch": 1.6275307762742903, "eval_loss": 2.068246603012085, "eval_runtime": 73.2939, "eval_samples_per_second": 136.437, "eval_steps_per_second": 2.142, "step": 4000 }, { "epoch": 2.4411435547868554, "grad_norm": 13.966560363769531, "learning_rate": 7.557997557997558e-05, "loss": 2.0445, "step": 6000 }, { "epoch": 2.4411435547868554, "eval_loss": 1.8596677780151367, "eval_runtime": 73.1178, "eval_samples_per_second": 136.766, "eval_steps_per_second": 2.147, "step": 6000 }, { "epoch": 3.25475633329942, "grad_norm": 14.865880012512207, "learning_rate": 6.743996743996744e-05, "loss": 1.8887, "step": 8000 }, { "epoch": 3.25475633329942, "eval_loss": 1.7477023601531982, "eval_runtime": 73.1164, "eval_samples_per_second": 136.768, "eval_steps_per_second": 2.147, "step": 8000 }, { "epoch": 4.068369111811985, "grad_norm": 13.934277534484863, "learning_rate": 5.929995929995931e-05, "loss": 1.7863, "step": 10000 }, { "epoch": 4.068369111811985, "eval_loss": 1.677494764328003, "eval_runtime": 73.1043, "eval_samples_per_second": 136.791, "eval_steps_per_second": 2.148, "step": 10000 }, { "epoch": 4.882287109573711, "grad_norm": 13.954177856445312, "learning_rate": 5.115995115995116e-05, "loss": 1.7168, "step": 12000 }, { "epoch": 4.882287109573711, "eval_loss": 1.6307039260864258, "eval_runtime": 73.09, "eval_samples_per_second": 136.818, "eval_steps_per_second": 2.148, "step": 12000 }, { "epoch": 5.6958998880862755, "grad_norm": 13.182450294494629, "learning_rate": 4.301994301994302e-05, "loss": 1.6626, "step": 14000 }, { "epoch": 5.6958998880862755, "eval_loss": 1.5682964324951172, "eval_runtime": 73.1095, "eval_samples_per_second": 136.781, "eval_steps_per_second": 2.147, "step": 14000 }, { "epoch": 6.50951266659884, "grad_norm": 13.514598846435547, "learning_rate": 3.487993487993488e-05, "loss": 1.6187, "step": 16000 }, { "epoch": 6.50951266659884, "eval_loss": 1.5308398008346558, "eval_runtime": 73.125, "eval_samples_per_second": 136.752, "eval_steps_per_second": 2.147, "step": 16000 }, { "epoch": 7.323125445111405, "grad_norm": 14.169295310974121, "learning_rate": 2.673992673992674e-05, "loss": 1.5873, "step": 18000 }, { "epoch": 7.323125445111405, "eval_loss": 1.5098525285720825, "eval_runtime": 73.107, "eval_samples_per_second": 136.786, "eval_steps_per_second": 2.148, "step": 18000 }, { "epoch": 8.13673822362397, "grad_norm": 13.590775489807129, "learning_rate": 1.85999185999186e-05, "loss": 1.5568, "step": 20000 }, { "epoch": 8.13673822362397, "eval_loss": 1.4912240505218506, "eval_runtime": 73.2463, "eval_samples_per_second": 136.526, "eval_steps_per_second": 2.143, "step": 20000 }, { "epoch": 8.950656221385696, "grad_norm": 13.897253036499023, "learning_rate": 1.045991045991046e-05, "loss": 1.5348, "step": 22000 }, { "epoch": 8.950656221385696, "eval_loss": 1.4697929620742798, "eval_runtime": 73.2504, "eval_samples_per_second": 136.518, "eval_steps_per_second": 2.143, "step": 22000 }, { "epoch": 9.76426899989826, "grad_norm": 13.867361068725586, "learning_rate": 2.31990231990232e-06, "loss": 1.5172, "step": 24000 }, { "epoch": 9.76426899989826, "eval_loss": 1.4656140804290771, "eval_runtime": 73.2214, "eval_samples_per_second": 136.572, "eval_steps_per_second": 2.144, "step": 24000 } ], "logging_steps": 2000, "max_steps": 24570, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6550302836575232e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }