{ "best_metric": 0.047215357422828674, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_gulf_aragpt2-large/checkpoint-3344", "epoch": 7.0, "eval_steps": 500, "global_step": 11704, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.7535182237625122, "learning_rate": 4.822100789313904e-05, "loss": 0.2041, "step": 1672 }, { "epoch": 1.0, "eval_bleu": 0.04446881026937374, "eval_loss": 0.04873450845479965, "eval_rouge1": 0.3745425104768441, "eval_rouge2": 0.13019286214073578, "eval_rougeL": 0.37175423318625556, "eval_runtime": 280.7972, "eval_samples_per_second": 5.954, "eval_steps_per_second": 1.489, "step": 1672 }, { "epoch": 2.0, "grad_norm": 0.5986363887786865, "learning_rate": 4.568306010928962e-05, "loss": 0.0404, "step": 3344 }, { "epoch": 2.0, "eval_bleu": 0.06321503399346401, "eval_loss": 0.047215357422828674, "eval_rouge1": 0.40393751877732786, "eval_rouge2": 0.16331196085274022, "eval_rougeL": 0.4013347547558428, "eval_runtime": 280.6911, "eval_samples_per_second": 5.957, "eval_steps_per_second": 1.489, "step": 3344 }, { "epoch": 3.0, "grad_norm": 0.6692299246788025, "learning_rate": 4.3145112325440196e-05, "loss": 0.0301, "step": 5016 }, { "epoch": 3.0, "eval_bleu": 0.07634204530814317, "eval_loss": 0.047973889857530594, "eval_rouge1": 0.43387069380649074, "eval_rouge2": 0.20015750349405118, "eval_rougeL": 0.4322337100043073, "eval_runtime": 342.1899, "eval_samples_per_second": 4.886, "eval_steps_per_second": 1.222, "step": 5016 }, { "epoch": 4.0, "grad_norm": 0.39187440276145935, "learning_rate": 4.0607164541590774e-05, "loss": 0.0232, "step": 6688 }, { "epoch": 4.0, "eval_bleu": 0.08429412502024626, "eval_loss": 0.05150514841079712, "eval_rouge1": 0.45350332758712053, "eval_rouge2": 0.2191636179696206, "eval_rougeL": 0.45170908410677657, "eval_runtime": 220.2929, "eval_samples_per_second": 7.59, "eval_steps_per_second": 1.897, "step": 6688 }, { "epoch": 5.0, "grad_norm": 0.7217269539833069, "learning_rate": 3.806921675774135e-05, "loss": 0.0189, "step": 8360 }, { "epoch": 5.0, "eval_bleu": 0.08759878040919578, "eval_loss": 0.053812965750694275, "eval_rouge1": 0.4654488236955554, "eval_rouge2": 0.22994914252764548, "eval_rougeL": 0.46382599849734485, "eval_runtime": 342.2351, "eval_samples_per_second": 4.886, "eval_steps_per_second": 1.221, "step": 8360 }, { "epoch": 6.0, "grad_norm": 0.5110601186752319, "learning_rate": 3.553126897389193e-05, "loss": 0.0164, "step": 10032 }, { "epoch": 6.0, "eval_bleu": 0.09295393672158372, "eval_loss": 0.05724157765507698, "eval_rouge1": 0.46747472033375215, "eval_rouge2": 0.2370003193385344, "eval_rougeL": 0.46529496417190075, "eval_runtime": 342.1804, "eval_samples_per_second": 4.886, "eval_steps_per_second": 1.222, "step": 10032 }, { "epoch": 7.0, "grad_norm": 0.23091594874858856, "learning_rate": 3.2993321190042506e-05, "loss": 0.0148, "step": 11704 }, { "epoch": 7.0, "eval_bleu": 0.0918093965340221, "eval_loss": 0.05825383961200714, "eval_rouge1": 0.46559708964007684, "eval_rouge2": 0.2307789620256464, "eval_rougeL": 0.46361556118175484, "eval_runtime": 280.7351, "eval_samples_per_second": 5.956, "eval_steps_per_second": 1.489, "step": 11704 }, { "epoch": 7.0, "step": 11704, "total_flos": 2.03668247150592e+17, "train_loss": 0.04967995416767697, "train_runtime": 22443.4901, "train_samples_per_second": 5.957, "train_steps_per_second": 1.49 } ], "logging_steps": 500, "max_steps": 33440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.03668247150592e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }