|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 2110, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 6.226415094339623e-06, |
|
"loss": 1.1807, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.2452830188679246e-05, |
|
"loss": 1.1325, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.8679245283018867e-05, |
|
"loss": 0.9876, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.4905660377358492e-05, |
|
"loss": 0.873, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.994011976047904e-05, |
|
"loss": 0.7931, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.9610778443113774e-05, |
|
"loss": 0.7764, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.9281437125748504e-05, |
|
"loss": 0.7701, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.8952095808383233e-05, |
|
"loss": 0.739, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.8622754491017966e-05, |
|
"loss": 0.7644, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6429210134128167, |
|
"eval_loss": 1.0036486387252808, |
|
"eval_runtime": 6.1897, |
|
"eval_samples_per_second": 32.312, |
|
"eval_steps_per_second": 2.1, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 14.5, |
|
"eval_f1": 19.727380952380955, |
|
"eval_qa_bleu": 9.309381343075087, |
|
"eval_qa_exact_match": 0.14, |
|
"eval_recite_bleu": 15.818078125717689, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.8293413173652696e-05, |
|
"loss": 0.6488, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.7964071856287425e-05, |
|
"loss": 0.5286, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.763473053892216e-05, |
|
"loss": 0.5157, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7305389221556884e-05, |
|
"loss": 0.4838, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.6976047904191617e-05, |
|
"loss": 0.5013, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.664670658682635e-05, |
|
"loss": 0.4649, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.6317365269461076e-05, |
|
"loss": 0.4604, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.598802395209581e-05, |
|
"loss": 0.5064, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.565868263473054e-05, |
|
"loss": 0.4732, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.5329341317365268e-05, |
|
"loss": 0.526, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6657600596125186, |
|
"eval_loss": 0.7350580096244812, |
|
"eval_runtime": 6.5425, |
|
"eval_samples_per_second": 30.569, |
|
"eval_steps_per_second": 1.987, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 12.5, |
|
"eval_f1": 19.69947691197691, |
|
"eval_qa_bleu": 9.452490656702446, |
|
"eval_qa_exact_match": 0.12, |
|
"eval_recite_bleu": 17.493375608153375, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4741, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.467065868263473e-05, |
|
"loss": 0.4338, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.4341317365269464e-05, |
|
"loss": 0.4451, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.4011976047904193e-05, |
|
"loss": 0.4312, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.3682634730538923e-05, |
|
"loss": 0.4363, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.3353293413173656e-05, |
|
"loss": 0.4302, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.302395209580838e-05, |
|
"loss": 0.4528, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.2694610778443115e-05, |
|
"loss": 0.4086, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.2365269461077847e-05, |
|
"loss": 0.4163, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6815499254843517, |
|
"eval_loss": 0.5743635296821594, |
|
"eval_runtime": 6.2699, |
|
"eval_samples_per_second": 31.898, |
|
"eval_steps_per_second": 2.073, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 13.0, |
|
"eval_f1": 20.81829004329004, |
|
"eval_qa_bleu": 7.245823931129993, |
|
"eval_qa_exact_match": 0.12, |
|
"eval_recite_bleu": 22.248138618982026, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 2.2035928143712574e-05, |
|
"loss": 0.3866, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 2.1706586826347306e-05, |
|
"loss": 0.2789, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 2.1377245508982036e-05, |
|
"loss": 0.2932, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 2.1047904191616766e-05, |
|
"loss": 0.2883, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 2.07185628742515e-05, |
|
"loss": 0.3048, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 2.0389221556886228e-05, |
|
"loss": 0.28, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.0059880239520957e-05, |
|
"loss": 0.2847, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.9730538922155687e-05, |
|
"loss": 0.2949, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.940119760479042e-05, |
|
"loss": 0.3002, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1.9071856287425153e-05, |
|
"loss": 0.2864, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.689903129657228, |
|
"eval_loss": 0.4952711760997772, |
|
"eval_runtime": 6.337, |
|
"eval_samples_per_second": 31.561, |
|
"eval_steps_per_second": 2.051, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 17.0, |
|
"eval_f1": 22.154761904761905, |
|
"eval_qa_bleu": 10.09488722739415, |
|
"eval_qa_exact_match": 0.155, |
|
"eval_recite_bleu": 27.91257178143564, |
|
"eval_recite_exact_match": 0.005, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.874251497005988e-05, |
|
"loss": 0.2368, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.8413173652694612e-05, |
|
"loss": 0.203, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.8083832335329345e-05, |
|
"loss": 0.2092, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.775449101796407e-05, |
|
"loss": 0.2188, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.7425149700598804e-05, |
|
"loss": 0.2092, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 1.7095808383233533e-05, |
|
"loss": 0.2144, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 1.6766467065868263e-05, |
|
"loss": 0.2061, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.6437125748502996e-05, |
|
"loss": 0.2161, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1.6107784431137725e-05, |
|
"loss": 0.2118, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6943889716840537, |
|
"eval_loss": 0.45938587188720703, |
|
"eval_runtime": 6.2541, |
|
"eval_samples_per_second": 31.979, |
|
"eval_steps_per_second": 2.079, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 16.0, |
|
"eval_f1": 22.22857142857142, |
|
"eval_qa_bleu": 10.69246525438524, |
|
"eval_qa_exact_match": 0.14, |
|
"eval_recite_bleu": 33.64683237936858, |
|
"eval_recite_exact_match": 0.025, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.5778443113772455e-05, |
|
"loss": 0.2129, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 1.5449101796407184e-05, |
|
"loss": 0.1559, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 1.5119760479041917e-05, |
|
"loss": 0.1631, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 1.4790419161676647e-05, |
|
"loss": 0.1614, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 1.4461077844311378e-05, |
|
"loss": 0.1636, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 1.4131736526946109e-05, |
|
"loss": 0.1645, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 1.3802395209580839e-05, |
|
"loss": 0.1613, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 1.3473053892215568e-05, |
|
"loss": 0.1724, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 1.31437125748503e-05, |
|
"loss": 0.1643, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 1.281437125748503e-05, |
|
"loss": 0.17, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6964456035767511, |
|
"eval_loss": 0.44896385073661804, |
|
"eval_runtime": 6.3718, |
|
"eval_samples_per_second": 31.389, |
|
"eval_steps_per_second": 2.04, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 15.0, |
|
"eval_f1": 21.070238095238086, |
|
"eval_qa_bleu": 11.294234950233255, |
|
"eval_qa_exact_match": 0.13, |
|
"eval_recite_bleu": 35.4728355349377, |
|
"eval_recite_exact_match": 0.025, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.2485029940119762e-05, |
|
"loss": 0.1441, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 1.2155688622754491e-05, |
|
"loss": 0.1356, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 1.182634730538922e-05, |
|
"loss": 0.1363, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 1.1497005988023954e-05, |
|
"loss": 0.137, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 1.1167664670658683e-05, |
|
"loss": 0.1382, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 1.0838323353293413e-05, |
|
"loss": 0.1416, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.0508982035928144e-05, |
|
"loss": 0.1389, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.0179640718562873e-05, |
|
"loss": 0.1392, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 9.850299401197606e-06, |
|
"loss": 0.1398, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 9.520958083832336e-06, |
|
"loss": 0.134, |
|
"step": 1474 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.697876304023845, |
|
"eval_loss": 0.43685418367385864, |
|
"eval_runtime": 6.2539, |
|
"eval_samples_per_second": 31.98, |
|
"eval_steps_per_second": 2.079, |
|
"step": 1477 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 16.5, |
|
"eval_f1": 22.119444444444444, |
|
"eval_qa_bleu": 13.989667861778496, |
|
"eval_qa_exact_match": 0.14, |
|
"eval_recite_bleu": 42.188868266934165, |
|
"eval_recite_exact_match": 0.04, |
|
"step": 1477 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 9.191616766467065e-06, |
|
"loss": 0.1188, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 8.862275449101796e-06, |
|
"loss": 0.1207, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 8.532934131736528e-06, |
|
"loss": 0.1223, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 8.203592814371257e-06, |
|
"loss": 0.115, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 7.874251497005988e-06, |
|
"loss": 0.1139, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 7.544910179640718e-06, |
|
"loss": 0.117, |
|
"step": 1606 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 7.215568862275449e-06, |
|
"loss": 0.1203, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 6.8862275449101795e-06, |
|
"loss": 0.1229, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 6.556886227544911e-06, |
|
"loss": 0.1206, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6986959761549926, |
|
"eval_loss": 0.43722403049468994, |
|
"eval_runtime": 6.191, |
|
"eval_samples_per_second": 32.305, |
|
"eval_steps_per_second": 2.1, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 20.0, |
|
"eval_f1": 25.83571428571428, |
|
"eval_qa_bleu": 8.333490045944334, |
|
"eval_qa_exact_match": 0.18, |
|
"eval_recite_bleu": 43.68492978075338, |
|
"eval_recite_exact_match": 0.065, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 6.22754491017964e-06, |
|
"loss": 0.1145, |
|
"step": 1694 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 5.898203592814371e-06, |
|
"loss": 0.1066, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 5.568862275449102e-06, |
|
"loss": 0.1064, |
|
"step": 1738 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 5.239520958083833e-06, |
|
"loss": 0.1077, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 4.9101796407185625e-06, |
|
"loss": 0.1108, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 4.580838323353294e-06, |
|
"loss": 0.1078, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 4.251497005988024e-06, |
|
"loss": 0.1072, |
|
"step": 1826 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 3.922155688622755e-06, |
|
"loss": 0.1098, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 3.592814371257485e-06, |
|
"loss": 0.1093, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 3.2634730538922155e-06, |
|
"loss": 0.1081, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.698725782414307, |
|
"eval_loss": 0.44227516651153564, |
|
"eval_runtime": 6.2856, |
|
"eval_samples_per_second": 31.819, |
|
"eval_steps_per_second": 2.068, |
|
"step": 1899 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 17.5, |
|
"eval_f1": 22.30357142857143, |
|
"eval_qa_bleu": 10.817736383091892, |
|
"eval_qa_exact_match": 0.15, |
|
"eval_recite_bleu": 43.37666725316897, |
|
"eval_recite_exact_match": 0.06, |
|
"step": 1899 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 2.9341317365269463e-06, |
|
"loss": 0.1079, |
|
"step": 1914 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 2.6047904191616767e-06, |
|
"loss": 0.102, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 2.2754491017964075e-06, |
|
"loss": 0.1003, |
|
"step": 1958 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 1.9461077844311374e-06, |
|
"loss": 0.1023, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 1.6167664670658684e-06, |
|
"loss": 0.1003, |
|
"step": 2002 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 1.287425149700599e-06, |
|
"loss": 0.1058, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 9.580838323353293e-07, |
|
"loss": 0.1067, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 6.287425149700599e-07, |
|
"loss": 0.1047, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 2.994011976047904e-07, |
|
"loss": 0.1053, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6988971684053651, |
|
"eval_loss": 0.4435840845108032, |
|
"eval_runtime": 6.3653, |
|
"eval_samples_per_second": 31.42, |
|
"eval_steps_per_second": 2.042, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 20.5, |
|
"eval_f1": 25.95833333333333, |
|
"eval_qa_bleu": 12.082719988904218, |
|
"eval_qa_exact_match": 0.18, |
|
"eval_recite_bleu": 44.74289478974747, |
|
"eval_recite_exact_match": 0.075, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2110, |
|
"total_flos": 9.64020042848256e+16, |
|
"train_loss": 0.29445283661521443, |
|
"train_runtime": 3197.9615, |
|
"train_samples_per_second": 10.547, |
|
"train_steps_per_second": 0.66 |
|
} |
|
], |
|
"logging_steps": 22, |
|
"max_steps": 2110, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 9.64020042848256e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|