{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 6.818181818181818e-06, "loss": 5.9674, "step": 5 }, { "epoch": 0.23, "learning_rate": 1.3636363636363637e-05, "loss": 4.9018, "step": 10 }, { "epoch": 0.34, "learning_rate": 2.0454545454545454e-05, "loss": 3.5958, "step": 15 }, { "epoch": 0.45, "learning_rate": 2.7272727272727273e-05, "loss": 3.0987, "step": 20 }, { "epoch": 0.57, "learning_rate": 2.9784688995215314e-05, "loss": 2.9252, "step": 25 }, { "epoch": 0.68, "learning_rate": 2.9425837320574164e-05, "loss": 2.6791, "step": 30 }, { "epoch": 0.8, "learning_rate": 2.9066985645933014e-05, "loss": 2.5349, "step": 35 }, { "epoch": 0.91, "learning_rate": 2.8708133971291868e-05, "loss": 2.5677, "step": 40 }, { "epoch": 1.0, "eval_accuracy": 0.5620647773279352, "eval_loss": 2.2406601905822754, "eval_runtime": 2.8821, "eval_samples_per_second": 34.697, "eval_steps_per_second": 2.429, "step": 44 }, { "epoch": 1.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_qa_bleu": 0.0, "eval_qa_exact_match": 0.0, "eval_recite_bleu": 0.0, "eval_recite_exact_match": 0.0, "step": 44 }, { "epoch": 1.02, "learning_rate": 2.834928229665072e-05, "loss": 2.4426, "step": 45 }, { "epoch": 1.14, "learning_rate": 2.799043062200957e-05, "loss": 2.1847, "step": 50 }, { "epoch": 1.25, "learning_rate": 2.7631578947368423e-05, "loss": 2.0522, "step": 55 }, { "epoch": 1.36, "learning_rate": 2.7272727272727273e-05, "loss": 2.0172, "step": 60 }, { "epoch": 1.48, "learning_rate": 2.6913875598086123e-05, "loss": 1.936, "step": 65 }, { "epoch": 1.59, "learning_rate": 2.6555023923444974e-05, "loss": 1.9568, "step": 70 }, { "epoch": 1.7, "learning_rate": 2.6196172248803828e-05, "loss": 1.841, "step": 75 }, { "epoch": 1.82, "learning_rate": 2.583732057416268e-05, "loss": 1.8245, "step": 80 }, { "epoch": 1.93, "learning_rate": 2.5478468899521532e-05, "loss": 1.6845, "step": 85 }, { "epoch": 2.0, "eval_accuracy": 0.6334412955465587, "eval_loss": 1.5646737813949585, "eval_runtime": 2.9402, "eval_samples_per_second": 34.012, "eval_steps_per_second": 2.381, "step": 88 }, { "epoch": 2.0, "eval_exact_match": 3.0, "eval_f1": 4.5, "eval_qa_bleu": 1.0016612479686555, "eval_qa_exact_match": 0.02, "eval_recite_bleu": 4.0390973472926825, "eval_recite_exact_match": 0.0, "step": 88 }, { "epoch": 2.05, "learning_rate": 2.5119617224880386e-05, "loss": 1.5424, "step": 90 }, { "epoch": 2.16, "learning_rate": 2.4760765550239236e-05, "loss": 1.259, "step": 95 }, { "epoch": 2.27, "learning_rate": 2.4401913875598086e-05, "loss": 1.1652, "step": 100 }, { "epoch": 2.39, "learning_rate": 2.404306220095694e-05, "loss": 1.2034, "step": 105 }, { "epoch": 2.5, "learning_rate": 2.368421052631579e-05, "loss": 1.0906, "step": 110 }, { "epoch": 2.61, "learning_rate": 2.332535885167464e-05, "loss": 1.0916, "step": 115 }, { "epoch": 2.73, "learning_rate": 2.2966507177033495e-05, "loss": 1.013, "step": 120 }, { "epoch": 2.84, "learning_rate": 2.2607655502392345e-05, "loss": 1.0312, "step": 125 }, { "epoch": 2.95, "learning_rate": 2.2248803827751195e-05, "loss": 0.944, "step": 130 }, { "epoch": 3.0, "eval_accuracy": 0.7006072874493927, "eval_loss": 1.0917375087738037, "eval_runtime": 2.9489, "eval_samples_per_second": 33.911, "eval_steps_per_second": 2.374, "step": 132 }, { "epoch": 3.0, "eval_exact_match": 5.0, "eval_f1": 7.0, "eval_qa_bleu": 2.8152067076298715, "eval_qa_exact_match": 0.04, "eval_recite_bleu": 7.904352411801737, "eval_recite_exact_match": 0.0, "step": 132 }, { "epoch": 3.07, "learning_rate": 2.188995215311005e-05, "loss": 0.8591, "step": 135 }, { "epoch": 3.18, "learning_rate": 2.15311004784689e-05, "loss": 0.6686, "step": 140 }, { "epoch": 3.3, "learning_rate": 2.117224880382775e-05, "loss": 0.6312, "step": 145 }, { "epoch": 3.41, "learning_rate": 2.0813397129186604e-05, "loss": 0.643, "step": 150 }, { "epoch": 3.52, "learning_rate": 2.0454545454545454e-05, "loss": 0.6344, "step": 155 }, { "epoch": 3.64, "learning_rate": 2.0095693779904308e-05, "loss": 0.5708, "step": 160 }, { "epoch": 3.75, "learning_rate": 1.9736842105263158e-05, "loss": 0.5898, "step": 165 }, { "epoch": 3.86, "learning_rate": 1.9377990430622012e-05, "loss": 0.5068, "step": 170 }, { "epoch": 3.98, "learning_rate": 1.9019138755980862e-05, "loss": 0.5071, "step": 175 }, { "epoch": 4.0, "eval_accuracy": 0.7508906882591093, "eval_loss": 0.7387517690658569, "eval_runtime": 2.9584, "eval_samples_per_second": 33.802, "eval_steps_per_second": 2.366, "step": 176 }, { "epoch": 4.0, "eval_exact_match": 10.0, "eval_f1": 12.566666666666668, "eval_qa_bleu": 6.688606008500287, "eval_qa_exact_match": 0.09, "eval_recite_bleu": 12.897326731118289, "eval_recite_exact_match": 0.0, "step": 176 }, { "epoch": 4.09, "learning_rate": 1.8660287081339713e-05, "loss": 0.3888, "step": 180 }, { "epoch": 4.2, "learning_rate": 1.8301435406698566e-05, "loss": 0.3604, "step": 185 }, { "epoch": 4.32, "learning_rate": 1.7942583732057417e-05, "loss": 0.3616, "step": 190 }, { "epoch": 4.43, "learning_rate": 1.7583732057416267e-05, "loss": 0.3542, "step": 195 }, { "epoch": 4.55, "learning_rate": 1.722488038277512e-05, "loss": 0.3446, "step": 200 }, { "epoch": 4.66, "learning_rate": 1.686602870813397e-05, "loss": 0.3578, "step": 205 }, { "epoch": 4.77, "learning_rate": 1.650717703349282e-05, "loss": 0.331, "step": 210 }, { "epoch": 4.89, "learning_rate": 1.6148325358851675e-05, "loss": 0.3328, "step": 215 }, { "epoch": 5.0, "learning_rate": 1.5789473684210526e-05, "loss": 0.3104, "step": 220 }, { "epoch": 5.0, "eval_accuracy": 0.7798380566801619, "eval_loss": 0.5627078413963318, "eval_runtime": 2.8769, "eval_samples_per_second": 34.76, "eval_steps_per_second": 2.433, "step": 220 }, { "epoch": 5.0, "eval_exact_match": 10.0, "eval_f1": 12.852380952380951, "eval_qa_bleu": 6.174893155391455, "eval_qa_exact_match": 0.08, "eval_recite_bleu": 14.799858879722134, "eval_recite_exact_match": 0.02, "step": 220 }, { "epoch": 5.11, "learning_rate": 1.5430622009569376e-05, "loss": 0.242, "step": 225 }, { "epoch": 5.23, "learning_rate": 1.5071770334928228e-05, "loss": 0.2484, "step": 230 }, { "epoch": 5.34, "learning_rate": 1.4712918660287082e-05, "loss": 0.2336, "step": 235 }, { "epoch": 5.45, "learning_rate": 1.4354066985645934e-05, "loss": 0.2304, "step": 240 }, { "epoch": 5.57, "learning_rate": 1.3995215311004784e-05, "loss": 0.2039, "step": 245 }, { "epoch": 5.68, "learning_rate": 1.3636363636363637e-05, "loss": 0.2096, "step": 250 }, { "epoch": 5.8, "learning_rate": 1.3277511961722487e-05, "loss": 0.2107, "step": 255 }, { "epoch": 5.91, "learning_rate": 1.291866028708134e-05, "loss": 0.2331, "step": 260 }, { "epoch": 6.0, "eval_accuracy": 0.794493927125506, "eval_loss": 0.5108077526092529, "eval_runtime": 2.9331, "eval_samples_per_second": 34.094, "eval_steps_per_second": 2.387, "step": 264 }, { "epoch": 6.0, "eval_exact_match": 8.0, "eval_f1": 11.819047619047621, "eval_qa_bleu": 3.035429286549727, "eval_qa_exact_match": 0.06, "eval_recite_bleu": 18.406851919697136, "eval_recite_exact_match": 0.04, "step": 264 }, { "epoch": 6.02, "learning_rate": 1.2559808612440193e-05, "loss": 0.2358, "step": 265 }, { "epoch": 6.14, "learning_rate": 1.2200956937799043e-05, "loss": 0.182, "step": 270 }, { "epoch": 6.25, "learning_rate": 1.1842105263157895e-05, "loss": 0.1702, "step": 275 }, { "epoch": 6.36, "learning_rate": 1.1483253588516747e-05, "loss": 0.1627, "step": 280 }, { "epoch": 6.48, "learning_rate": 1.1124401913875598e-05, "loss": 0.164, "step": 285 }, { "epoch": 6.59, "learning_rate": 1.076555023923445e-05, "loss": 0.158, "step": 290 }, { "epoch": 6.7, "learning_rate": 1.0406698564593302e-05, "loss": 0.1528, "step": 295 }, { "epoch": 6.82, "learning_rate": 1.0047846889952154e-05, "loss": 0.1621, "step": 300 }, { "epoch": 6.93, "learning_rate": 9.688995215311006e-06, "loss": 0.1608, "step": 305 }, { "epoch": 7.0, "eval_accuracy": 0.8043319838056681, "eval_loss": 0.4221380352973938, "eval_runtime": 2.9375, "eval_samples_per_second": 34.042, "eval_steps_per_second": 2.383, "step": 308 }, { "epoch": 7.0, "eval_exact_match": 15.0, "eval_f1": 20.2, "eval_qa_bleu": 11.25183347409325, "eval_qa_exact_match": 0.1, "eval_recite_bleu": 24.478610800554645, "eval_recite_exact_match": 0.06, "step": 308 }, { "epoch": 7.05, "learning_rate": 9.330143540669856e-06, "loss": 0.1434, "step": 310 }, { "epoch": 7.16, "learning_rate": 8.971291866028708e-06, "loss": 0.1321, "step": 315 }, { "epoch": 7.27, "learning_rate": 8.61244019138756e-06, "loss": 0.1251, "step": 320 }, { "epoch": 7.39, "learning_rate": 8.25358851674641e-06, "loss": 0.1219, "step": 325 }, { "epoch": 7.5, "learning_rate": 7.894736842105263e-06, "loss": 0.124, "step": 330 }, { "epoch": 7.61, "learning_rate": 7.535885167464114e-06, "loss": 0.1294, "step": 335 }, { "epoch": 7.73, "learning_rate": 7.177033492822967e-06, "loss": 0.1255, "step": 340 }, { "epoch": 7.84, "learning_rate": 6.818181818181818e-06, "loss": 0.1275, "step": 345 }, { "epoch": 7.95, "learning_rate": 6.45933014354067e-06, "loss": 0.1303, "step": 350 }, { "epoch": 8.0, "eval_accuracy": 0.8062348178137652, "eval_loss": 0.40748822689056396, "eval_runtime": 2.9587, "eval_samples_per_second": 33.799, "eval_steps_per_second": 2.366, "step": 352 }, { "epoch": 8.0, "eval_exact_match": 12.0, "eval_f1": 15.333333333333336, "eval_qa_bleu": 8.078342088900973, "eval_qa_exact_match": 0.09, "eval_recite_bleu": 22.43610014824646, "eval_recite_exact_match": 0.08, "step": 352 }, { "epoch": 8.07, "learning_rate": 6.1004784688995216e-06, "loss": 0.1123, "step": 355 }, { "epoch": 8.18, "learning_rate": 5.741626794258374e-06, "loss": 0.1099, "step": 360 }, { "epoch": 8.3, "learning_rate": 5.382775119617225e-06, "loss": 0.1063, "step": 365 }, { "epoch": 8.41, "learning_rate": 5.023923444976077e-06, "loss": 0.1131, "step": 370 }, { "epoch": 8.52, "learning_rate": 4.665071770334928e-06, "loss": 0.1034, "step": 375 }, { "epoch": 8.64, "learning_rate": 4.30622009569378e-06, "loss": 0.1057, "step": 380 }, { "epoch": 8.75, "learning_rate": 3.9473684210526315e-06, "loss": 0.1089, "step": 385 }, { "epoch": 8.86, "learning_rate": 3.5885167464114835e-06, "loss": 0.1059, "step": 390 }, { "epoch": 8.98, "learning_rate": 3.229665071770335e-06, "loss": 0.1159, "step": 395 }, { "epoch": 9.0, "eval_accuracy": 0.8068016194331984, "eval_loss": 0.40290313959121704, "eval_runtime": 3.0705, "eval_samples_per_second": 32.568, "eval_steps_per_second": 2.28, "step": 396 }, { "epoch": 9.0, "eval_exact_match": 12.0, "eval_f1": 15.419047619047621, "eval_qa_bleu": 7.93628181299011, "eval_qa_exact_match": 0.08, "eval_recite_bleu": 23.416353562844378, "eval_recite_exact_match": 0.05, "step": 396 }, { "epoch": 9.09, "learning_rate": 2.870813397129187e-06, "loss": 0.1092, "step": 400 }, { "epoch": 9.2, "learning_rate": 2.5119617224880385e-06, "loss": 0.099, "step": 405 }, { "epoch": 9.32, "learning_rate": 2.15311004784689e-06, "loss": 0.1011, "step": 410 }, { "epoch": 9.43, "learning_rate": 1.7942583732057418e-06, "loss": 0.0952, "step": 415 }, { "epoch": 9.55, "learning_rate": 1.4354066985645934e-06, "loss": 0.0974, "step": 420 }, { "epoch": 9.66, "learning_rate": 1.076555023923445e-06, "loss": 0.1015, "step": 425 }, { "epoch": 9.77, "learning_rate": 7.177033492822967e-07, "loss": 0.1049, "step": 430 }, { "epoch": 9.89, "learning_rate": 3.5885167464114835e-07, "loss": 0.0971, "step": 435 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 0.1004, "step": 440 }, { "epoch": 10.0, "eval_accuracy": 0.8079757085020243, "eval_loss": 0.40095633268356323, "eval_runtime": 2.9782, "eval_samples_per_second": 33.578, "eval_steps_per_second": 2.35, "step": 440 }, { "epoch": 10.0, "eval_exact_match": 12.0, "eval_f1": 14.357142857142858, "eval_qa_bleu": 9.762914151034567, "eval_qa_exact_match": 0.08, "eval_recite_bleu": 22.056151149667812, "eval_recite_exact_match": 0.05, "step": 440 }, { "epoch": 10.0, "step": 440, "total_flos": 1.11688444717056e+16, "train_loss": 0.07280441840941256, "train_runtime": 576.9944, "train_samples_per_second": 11.993, "train_steps_per_second": 0.763 } ], "logging_steps": 5, "max_steps": 440, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.11688444717056e+16, "trial_name": null, "trial_params": null }