|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 66, |
|
"global_step": 1310, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3e-05, |
|
"loss": 3.7331, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3e-05, |
|
"loss": 2.345, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3e-05, |
|
"loss": 2.1491, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0739, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.6206453178068898, |
|
"eval_loss": 1.8404711484909058, |
|
"eval_runtime": 11.9883, |
|
"eval_samples_per_second": 25.024, |
|
"eval_steps_per_second": 1.585, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_exact_match": 7.666666666666667, |
|
"eval_f1": 10.221428571428572, |
|
"eval_qa_bleu": 1.4042262195131967, |
|
"eval_qa_exact_match": 0.07333333333333333, |
|
"eval_recite_bleu": 8.5956480576491, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9722, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3e-05, |
|
"loss": 2.014, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9812, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9213, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3e-05, |
|
"loss": 1.8806, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.6365065502183406, |
|
"eval_loss": 1.574863314628601, |
|
"eval_runtime": 9.1867, |
|
"eval_samples_per_second": 32.656, |
|
"eval_steps_per_second": 2.068, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_exact_match": 8.666666666666666, |
|
"eval_f1": 13.8123783922171, |
|
"eval_qa_bleu": 3.0725240037081307, |
|
"eval_qa_exact_match": 0.08666666666666667, |
|
"eval_recite_bleu": 16.386073781113847, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6177, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4286, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4102, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4033, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3619, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_accuracy": 0.6533139252789908, |
|
"eval_loss": 1.342494010925293, |
|
"eval_runtime": 9.3165, |
|
"eval_samples_per_second": 32.201, |
|
"eval_steps_per_second": 2.039, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_exact_match": 7.0, |
|
"eval_f1": 10.38888888888889, |
|
"eval_qa_bleu": 5.590724094958645, |
|
"eval_qa_exact_match": 0.06666666666666667, |
|
"eval_recite_bleu": 17.070052559724132, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3649, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2886, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2787, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3e-05, |
|
"loss": 1.283, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.6685152838427948, |
|
"eval_loss": 1.1253471374511719, |
|
"eval_runtime": 9.3443, |
|
"eval_samples_per_second": 32.105, |
|
"eval_steps_per_second": 2.033, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_exact_match": 9.0, |
|
"eval_f1": 13.042195767195768, |
|
"eval_qa_bleu": 5.743589453455261, |
|
"eval_qa_exact_match": 0.08666666666666667, |
|
"eval_recite_bleu": 15.4059437725803, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3e-05, |
|
"loss": 1.112, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8377, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8533, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8407, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8433, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_accuracy": 0.6825278990781174, |
|
"eval_loss": 0.9735248684883118, |
|
"eval_runtime": 9.3401, |
|
"eval_samples_per_second": 32.12, |
|
"eval_steps_per_second": 2.034, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_exact_match": 11.666666666666666, |
|
"eval_f1": 15.95859788359788, |
|
"eval_qa_bleu": 6.31601664322449, |
|
"eval_qa_exact_match": 0.11, |
|
"eval_recite_bleu": 21.285839887136575, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8632, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8308, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8475, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7829, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7629, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.6982241630276564, |
|
"eval_loss": 0.7873561382293701, |
|
"eval_runtime": 9.0856, |
|
"eval_samples_per_second": 33.019, |
|
"eval_steps_per_second": 2.091, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_exact_match": 12.333333333333334, |
|
"eval_f1": 16.93478835978836, |
|
"eval_qa_bleu": 7.7589360856745815, |
|
"eval_qa_exact_match": 0.12, |
|
"eval_recite_bleu": 22.16978951972916, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5537, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5111, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 3e-05, |
|
"loss": 0.479, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4904, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5058, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_accuracy": 0.7086026200873362, |
|
"eval_loss": 0.6920613646507263, |
|
"eval_runtime": 9.3473, |
|
"eval_samples_per_second": 32.095, |
|
"eval_steps_per_second": 2.033, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_exact_match": 15.333333333333334, |
|
"eval_f1": 20.8812384782973, |
|
"eval_qa_bleu": 11.001436952878551, |
|
"eval_qa_exact_match": 0.14, |
|
"eval_recite_bleu": 25.319328021621963, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5456, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4998, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5075, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4593, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_accuracy": 0.7196943231441048, |
|
"eval_loss": 0.564062237739563, |
|
"eval_runtime": 9.2784, |
|
"eval_samples_per_second": 32.333, |
|
"eval_steps_per_second": 2.048, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_exact_match": 13.333333333333334, |
|
"eval_f1": 18.11966301672184, |
|
"eval_qa_bleu": 4.47266646700781, |
|
"eval_qa_exact_match": 0.12666666666666668, |
|
"eval_recite_bleu": 26.136161725029392, |
|
"eval_recite_exact_match": 0.0033333333333333335, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3952, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2996, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3354, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3179, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3064, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"eval_accuracy": 0.7245269286754003, |
|
"eval_loss": 0.5348330736160278, |
|
"eval_runtime": 9.5519, |
|
"eval_samples_per_second": 31.407, |
|
"eval_steps_per_second": 1.989, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"eval_exact_match": 13.666666666666666, |
|
"eval_f1": 19.689682539682536, |
|
"eval_qa_bleu": 4.45260487005976, |
|
"eval_qa_exact_match": 0.12666666666666668, |
|
"eval_recite_bleu": 28.855187002245795, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3258, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3384, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3258, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 3e-05, |
|
"loss": 0.312, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2967, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_accuracy": 0.7303784570596797, |
|
"eval_loss": 0.47703343629837036, |
|
"eval_runtime": 9.4774, |
|
"eval_samples_per_second": 31.654, |
|
"eval_steps_per_second": 2.005, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_exact_match": 12.333333333333334, |
|
"eval_f1": 18.61798941798941, |
|
"eval_qa_bleu": 6.60839076276961, |
|
"eval_qa_exact_match": 0.12, |
|
"eval_recite_bleu": 29.5293953590396, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2148, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2275, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2158, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2167, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_accuracy": 0.732372634643377, |
|
"eval_loss": 0.458192378282547, |
|
"eval_runtime": 9.1255, |
|
"eval_samples_per_second": 32.875, |
|
"eval_steps_per_second": 2.082, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_exact_match": 14.333333333333334, |
|
"eval_f1": 19.81243386243386, |
|
"eval_qa_bleu": 8.961623752889384, |
|
"eval_qa_exact_match": 0.13666666666666666, |
|
"eval_recite_bleu": 33.41904095334099, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 3e-05, |
|
"loss": 0.229, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2275, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2211, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2231, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2157, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_accuracy": 0.7358369723435225, |
|
"eval_loss": 0.4307834804058075, |
|
"eval_runtime": 9.1847, |
|
"eval_samples_per_second": 32.663, |
|
"eval_steps_per_second": 2.069, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_exact_match": 16.666666666666668, |
|
"eval_f1": 21.786772486772488, |
|
"eval_qa_bleu": 7.511563755726586, |
|
"eval_qa_exact_match": 0.16, |
|
"eval_recite_bleu": 37.63384023220464, |
|
"eval_recite_exact_match": 0.013333333333333334, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1669, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1712, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1601, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1608, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1597, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"eval_accuracy": 0.7373410965550704, |
|
"eval_loss": 0.4301389157772064, |
|
"eval_runtime": 9.7372, |
|
"eval_samples_per_second": 30.81, |
|
"eval_steps_per_second": 1.951, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"eval_exact_match": 15.666666666666666, |
|
"eval_f1": 21.056661856661847, |
|
"eval_qa_bleu": 12.649140852831426, |
|
"eval_qa_exact_match": 0.14666666666666667, |
|
"eval_recite_bleu": 42.22665248887737, |
|
"eval_recite_exact_match": 0.013333333333333334, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1623, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1668, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1624, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1648, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1481, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_accuracy": 0.7385298398835517, |
|
"eval_loss": 0.42236796021461487, |
|
"eval_runtime": 9.3603, |
|
"eval_samples_per_second": 32.05, |
|
"eval_steps_per_second": 2.03, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_exact_match": 18.666666666666668, |
|
"eval_f1": 25.187830687830694, |
|
"eval_qa_bleu": 8.014835952265651, |
|
"eval_qa_exact_match": 0.18, |
|
"eval_recite_bleu": 42.38987173856079, |
|
"eval_recite_exact_match": 0.016666666666666666, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1227, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1272, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1312, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1293, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"eval_accuracy": 0.739422610383309, |
|
"eval_loss": 0.41248488426208496, |
|
"eval_runtime": 9.7486, |
|
"eval_samples_per_second": 30.774, |
|
"eval_steps_per_second": 1.949, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"eval_exact_match": 15.666666666666666, |
|
"eval_f1": 22.348196248196246, |
|
"eval_qa_bleu": 6.260683683577316, |
|
"eval_qa_exact_match": 0.14666666666666667, |
|
"eval_recite_bleu": 45.04221504063147, |
|
"eval_recite_exact_match": 0.023333333333333334, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1345, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1325, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1273, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1362, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3e-05, |
|
"loss": 0.125, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_accuracy": 0.7399902959728287, |
|
"eval_loss": 0.41223272681236267, |
|
"eval_runtime": 9.2562, |
|
"eval_samples_per_second": 32.411, |
|
"eval_steps_per_second": 2.053, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_exact_match": 18.0, |
|
"eval_f1": 25.051058201058197, |
|
"eval_qa_bleu": 10.352483602423003, |
|
"eval_qa_exact_match": 0.17, |
|
"eval_recite_bleu": 46.26228887496748, |
|
"eval_recite_exact_match": 0.04666666666666667, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 3e-05, |
|
"loss": 0.11, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1045, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1179, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1083, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1139, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_accuracy": 0.7406501698204755, |
|
"eval_loss": 0.40691348910331726, |
|
"eval_runtime": 9.0014, |
|
"eval_samples_per_second": 33.328, |
|
"eval_steps_per_second": 2.111, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_exact_match": 16.333333333333332, |
|
"eval_f1": 23.766955266955264, |
|
"eval_qa_bleu": 11.966740392922118, |
|
"eval_qa_exact_match": 0.15333333333333332, |
|
"eval_recite_bleu": 45.74271226096357, |
|
"eval_recite_exact_match": 0.03333333333333333, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1108, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1168, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1132, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1141, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"eval_accuracy": 0.7409364386220282, |
|
"eval_loss": 0.40822312235832214, |
|
"eval_runtime": 9.119, |
|
"eval_samples_per_second": 32.898, |
|
"eval_steps_per_second": 2.084, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"eval_exact_match": 17.333333333333332, |
|
"eval_f1": 23.91428571428571, |
|
"eval_qa_bleu": 11.956602534707741, |
|
"eval_qa_exact_match": 0.16666666666666666, |
|
"eval_recite_bleu": 49.23892010841006, |
|
"eval_recite_exact_match": 0.056666666666666664, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0925, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0918, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0975, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0998, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0994, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_accuracy": 0.7412130033964095, |
|
"eval_loss": 0.40646126866340637, |
|
"eval_runtime": 9.2467, |
|
"eval_samples_per_second": 32.444, |
|
"eval_steps_per_second": 2.055, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_exact_match": 18.0, |
|
"eval_f1": 25.15901875901876, |
|
"eval_qa_bleu": 9.885421847181547, |
|
"eval_qa_exact_match": 0.16666666666666666, |
|
"eval_recite_bleu": 48.32004544781462, |
|
"eval_recite_exact_match": 0.06, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0972, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1074, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0998, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1094, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1310, |
|
"total_flos": 5.95362899625984e+16, |
|
"train_loss": 0.5945830505312855, |
|
"train_runtime": 4268.7073, |
|
"train_samples_per_second": 4.905, |
|
"train_steps_per_second": 0.307 |
|
} |
|
], |
|
"logging_steps": 14, |
|
"max_steps": 1310, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 5.95362899625984e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|