{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.827521206409048, "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 5e-06, "loss": 0.8357, "step": 200 }, { "epoch": 0.09, "learning_rate": 1e-05, "loss": 0.6021, "step": 400 }, { "epoch": 0.14, "learning_rate": 1.5e-05, "loss": 0.216, "step": 600 }, { "epoch": 0.19, "learning_rate": 2e-05, "loss": 0.1557, "step": 800 }, { "epoch": 0.24, "learning_rate": 2.5e-05, "loss": 0.1325, "step": 1000 }, { "epoch": 0.24, "eval_bleu": 8.163188643432274, "eval_chrf": 9.012540691377044, "eval_loss": 0.21054820716381073, "eval_runtime": 641.8265, "eval_samples_per_second": 2.493, "eval_steps_per_second": 0.078, "step": 1000 }, { "epoch": 0.28, "learning_rate": 3e-05, "loss": 0.1126, "step": 1200 }, { "epoch": 0.33, "learning_rate": 3.5e-05, "loss": 0.1059, "step": 1400 }, { "epoch": 0.38, "learning_rate": 4e-05, "loss": 0.096, "step": 1600 }, { "epoch": 0.42, "learning_rate": 4.5e-05, "loss": 0.0972, "step": 1800 }, { "epoch": 0.47, "learning_rate": 5e-05, "loss": 0.0895, "step": 2000 }, { "epoch": 0.47, "eval_bleu": 12.747283674419554, "eval_chrf": 14.975219232972814, "eval_loss": 0.2011401355266571, "eval_runtime": 899.369, "eval_samples_per_second": 1.779, "eval_steps_per_second": 0.056, "step": 2000 }, { "epoch": 0.52, "learning_rate": 4.906820723071189e-05, "loss": 0.0782, "step": 2200 }, { "epoch": 0.57, "learning_rate": 4.813641446142379e-05, "loss": 0.0775, "step": 2400 }, { "epoch": 0.61, "learning_rate": 4.720462169213567e-05, "loss": 0.0754, "step": 2600 }, { "epoch": 0.66, "learning_rate": 4.627282892284756e-05, "loss": 0.0698, "step": 2800 }, { "epoch": 0.71, "learning_rate": 4.5341036153559454e-05, "loss": 0.0682, "step": 3000 }, { "epoch": 0.71, "eval_bleu": 6.876683844974673, "eval_chrf": 14.013775496967607, "eval_loss": 0.2018859088420868, "eval_runtime": 978.3085, "eval_samples_per_second": 1.635, "eval_steps_per_second": 0.051, "step": 3000 }, { "epoch": 0.75, "learning_rate": 4.440924338427134e-05, "loss": 0.0633, "step": 3200 }, { "epoch": 0.8, "learning_rate": 4.347745061498323e-05, "loss": 0.0625, "step": 3400 }, { "epoch": 0.85, "learning_rate": 4.254565784569512e-05, "loss": 0.0627, "step": 3600 }, { "epoch": 0.9, "learning_rate": 4.161386507640701e-05, "loss": 0.0595, "step": 3800 }, { "epoch": 0.94, "learning_rate": 4.06820723071189e-05, "loss": 0.0591, "step": 4000 }, { "epoch": 0.94, "eval_bleu": 28.04277575880573, "eval_chrf": 24.223488772329105, "eval_loss": 0.19976669549942017, "eval_runtime": 250.605, "eval_samples_per_second": 6.385, "eval_steps_per_second": 0.2, "step": 4000 }, { "epoch": 0.99, "learning_rate": 3.975027953783079e-05, "loss": 0.0583, "step": 4200 }, { "epoch": 1.04, "learning_rate": 3.8818486768542676e-05, "loss": 0.0528, "step": 4400 }, { "epoch": 1.08, "learning_rate": 3.788669399925457e-05, "loss": 0.0511, "step": 4600 }, { "epoch": 1.13, "learning_rate": 3.695490122996646e-05, "loss": 0.0512, "step": 4800 }, { "epoch": 1.18, "learning_rate": 3.602310846067834e-05, "loss": 0.0482, "step": 5000 }, { "epoch": 1.18, "eval_bleu": 28.9415072135438, "eval_chrf": 24.984505458577555, "eval_loss": 0.19697046279907227, "eval_runtime": 264.9755, "eval_samples_per_second": 6.038, "eval_steps_per_second": 0.189, "step": 5000 }, { "epoch": 1.23, "learning_rate": 3.509131569139024e-05, "loss": 0.0477, "step": 5200 }, { "epoch": 1.27, "learning_rate": 3.415952292210213e-05, "loss": 0.0475, "step": 5400 }, { "epoch": 1.32, "learning_rate": 3.3227730152814016e-05, "loss": 0.0477, "step": 5600 }, { "epoch": 1.37, "learning_rate": 3.2295937383525905e-05, "loss": 0.0471, "step": 5800 }, { "epoch": 1.41, "learning_rate": 3.1364144614237794e-05, "loss": 0.0461, "step": 6000 }, { "epoch": 1.41, "eval_bleu": 29.631223435519598, "eval_chrf": 25.61588898393464, "eval_loss": 0.19584685564041138, "eval_runtime": 306.0261, "eval_samples_per_second": 5.228, "eval_steps_per_second": 0.163, "step": 6000 }, { "epoch": 1.46, "learning_rate": 3.0432351844949686e-05, "loss": 0.0465, "step": 6200 }, { "epoch": 1.51, "learning_rate": 2.950055907566158e-05, "loss": 0.0458, "step": 6400 }, { "epoch": 1.56, "learning_rate": 2.8568766306373464e-05, "loss": 0.0433, "step": 6600 }, { "epoch": 1.6, "learning_rate": 2.7636973537085353e-05, "loss": 0.045, "step": 6800 }, { "epoch": 1.65, "learning_rate": 2.6705180767797245e-05, "loss": 0.044, "step": 7000 }, { "epoch": 1.65, "eval_bleu": 29.440869914401336, "eval_chrf": 25.503480748268714, "eval_loss": 0.201187402009964, "eval_runtime": 327.7594, "eval_samples_per_second": 4.882, "eval_steps_per_second": 0.153, "step": 7000 }, { "epoch": 1.7, "learning_rate": 2.5773387998509134e-05, "loss": 0.0434, "step": 7200 }, { "epoch": 1.74, "learning_rate": 2.4841595229221023e-05, "loss": 0.0431, "step": 7400 }, { "epoch": 1.79, "learning_rate": 2.3909802459932912e-05, "loss": 0.0429, "step": 7600 }, { "epoch": 1.84, "learning_rate": 2.2978009690644804e-05, "loss": 0.0411, "step": 7800 }, { "epoch": 1.89, "learning_rate": 2.204621692135669e-05, "loss": 0.0397, "step": 8000 }, { "epoch": 1.89, "eval_bleu": 29.715590270005077, "eval_chrf": 25.651817457402736, "eval_loss": 0.19735974073410034, "eval_runtime": 382.7369, "eval_samples_per_second": 4.18, "eval_steps_per_second": 0.131, "step": 8000 }, { "epoch": 1.93, "learning_rate": 2.1114424152068582e-05, "loss": 0.0416, "step": 8200 }, { "epoch": 1.98, "learning_rate": 2.018263138278047e-05, "loss": 0.0404, "step": 8400 }, { "epoch": 2.03, "learning_rate": 1.925083861349236e-05, "loss": 0.0398, "step": 8600 }, { "epoch": 2.07, "learning_rate": 1.831904584420425e-05, "loss": 0.0371, "step": 8800 }, { "epoch": 2.12, "learning_rate": 1.738725307491614e-05, "loss": 0.0367, "step": 9000 }, { "epoch": 2.12, "eval_bleu": 29.539694856534723, "eval_chrf": 25.49563217007088, "eval_loss": 0.20109611749649048, "eval_runtime": 343.2907, "eval_samples_per_second": 4.661, "eval_steps_per_second": 0.146, "step": 9000 }, { "epoch": 2.17, "learning_rate": 1.645546030562803e-05, "loss": 0.0357, "step": 9200 }, { "epoch": 2.21, "learning_rate": 1.552366753633992e-05, "loss": 0.0348, "step": 9400 }, { "epoch": 2.26, "learning_rate": 1.4591874767051808e-05, "loss": 0.0365, "step": 9600 }, { "epoch": 2.31, "learning_rate": 1.36600819977637e-05, "loss": 0.0356, "step": 9800 }, { "epoch": 2.36, "learning_rate": 1.2728289228475587e-05, "loss": 0.0361, "step": 10000 }, { "epoch": 2.36, "eval_bleu": 29.780602519540743, "eval_chrf": 25.721793630268674, "eval_loss": 0.20224666595458984, "eval_runtime": 349.4231, "eval_samples_per_second": 4.579, "eval_steps_per_second": 0.143, "step": 10000 }, { "epoch": 2.4, "learning_rate": 1.1796496459187478e-05, "loss": 0.0354, "step": 10200 }, { "epoch": 2.45, "learning_rate": 1.0864703689899367e-05, "loss": 0.0349, "step": 10400 }, { "epoch": 2.5, "learning_rate": 9.932910920611255e-06, "loss": 0.0352, "step": 10600 }, { "epoch": 2.54, "learning_rate": 9.001118151323146e-06, "loss": 0.0353, "step": 10800 }, { "epoch": 2.59, "learning_rate": 8.069325382035035e-06, "loss": 0.0357, "step": 11000 }, { "epoch": 2.59, "eval_bleu": 30.453580686642933, "eval_chrf": 26.259452504202525, "eval_loss": 0.19840951263904572, "eval_runtime": 395.5462, "eval_samples_per_second": 4.045, "eval_steps_per_second": 0.126, "step": 11000 }, { "epoch": 2.64, "learning_rate": 7.137532612746925e-06, "loss": 0.0352, "step": 11200 }, { "epoch": 2.69, "learning_rate": 6.205739843458815e-06, "loss": 0.0344, "step": 11400 }, { "epoch": 2.73, "learning_rate": 5.273947074170705e-06, "loss": 0.0335, "step": 11600 }, { "epoch": 2.78, "learning_rate": 4.342154304882595e-06, "loss": 0.0346, "step": 11800 }, { "epoch": 2.83, "learning_rate": 3.410361535594484e-06, "loss": 0.0352, "step": 12000 }, { "epoch": 2.83, "eval_bleu": 30.412569602104455, "eval_chrf": 26.262864851975078, "eval_loss": 0.1988079845905304, "eval_runtime": 424.6307, "eval_samples_per_second": 3.768, "eval_steps_per_second": 0.118, "step": 12000 } ], "max_steps": 12732, "num_train_epochs": 3, "total_flos": 2.926491104968704e+16, "trial_name": null, "trial_params": null }