{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0432038343402977, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-06, "loss": 2.2876, "step": 5 }, { "epoch": 0.0, "learning_rate": 1e-05, "loss": 2.2895, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.5e-05, "loss": 2.3244, "step": 15 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 2.3113, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.5e-05, "loss": 2.276, "step": 25 }, { "epoch": 0.0, "learning_rate": 3e-05, "loss": 2.2575, "step": 30 }, { "epoch": 0.0, "learning_rate": 3.5e-05, "loss": 2.261, "step": 35 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 2.3032, "step": 40 }, { "epoch": 0.0, "learning_rate": 4.5e-05, "loss": 2.2794, "step": 45 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.249, "step": 50 }, { "epoch": 0.0, "learning_rate": 5.500000000000001e-05, "loss": 2.1937, "step": 55 }, { "epoch": 0.0, "learning_rate": 6e-05, "loss": 2.2017, "step": 60 }, { "epoch": 0.0, "learning_rate": 6.500000000000001e-05, "loss": 2.2269, "step": 65 }, { "epoch": 0.0, "learning_rate": 7e-05, "loss": 2.1804, "step": 70 }, { "epoch": 0.0, "learning_rate": 7.500000000000001e-05, "loss": 2.1651, "step": 75 }, { "epoch": 0.0, "learning_rate": 8e-05, "loss": 2.1852, "step": 80 }, { "epoch": 0.0, "learning_rate": 8.5e-05, "loss": 2.1501, "step": 85 }, { "epoch": 0.0, "learning_rate": 9e-05, "loss": 2.1714, "step": 90 }, { "epoch": 0.0, "learning_rate": 9.5e-05, "loss": 2.1539, "step": 95 }, { "epoch": 0.0, "learning_rate": 0.0001, "loss": 2.1446, "step": 100 }, { "epoch": 0.0, "learning_rate": 9.99999998198986e-05, "loss": 2.1057, "step": 105 }, { "epoch": 0.0, "learning_rate": 9.999999927959438e-05, "loss": 2.118, "step": 110 }, { "epoch": 0.0, "learning_rate": 9.999999837908737e-05, "loss": 2.0912, "step": 115 }, { "epoch": 0.0, "learning_rate": 9.999999711837756e-05, "loss": 2.1721, "step": 120 }, { "epoch": 0.0, "learning_rate": 9.999999549746495e-05, "loss": 2.0931, "step": 125 }, { "epoch": 0.0, "learning_rate": 9.999999351634958e-05, "loss": 2.1149, "step": 130 }, { "epoch": 0.0, "learning_rate": 9.999999117503144e-05, "loss": 2.0848, "step": 135 }, { "epoch": 0.0, "learning_rate": 9.999998847351057e-05, "loss": 2.0944, "step": 140 }, { "epoch": 0.0, "learning_rate": 9.999998541178696e-05, "loss": 2.0992, "step": 145 }, { "epoch": 0.0, "learning_rate": 9.999998198986065e-05, "loss": 2.0869, "step": 150 }, { "epoch": 0.0, "learning_rate": 9.999997820773166e-05, "loss": 2.0742, "step": 155 }, { "epoch": 0.0, "learning_rate": 9.999997406540001e-05, "loss": 2.0755, "step": 160 }, { "epoch": 0.0, "learning_rate": 9.999996956286575e-05, "loss": 2.0836, "step": 165 }, { "epoch": 0.0, "learning_rate": 9.999996470012889e-05, "loss": 2.0635, "step": 170 }, { "epoch": 0.0, "learning_rate": 9.999995947718947e-05, "loss": 2.0706, "step": 175 }, { "epoch": 0.0, "learning_rate": 9.999995389404755e-05, "loss": 2.0848, "step": 180 }, { "epoch": 0.0, "learning_rate": 9.999994795070315e-05, "loss": 2.0895, "step": 185 }, { "epoch": 0.0, "learning_rate": 9.999994164715632e-05, "loss": 2.0431, "step": 190 }, { "epoch": 0.0, "learning_rate": 9.999993498340709e-05, "loss": 2.0429, "step": 195 }, { "epoch": 0.0, "learning_rate": 9.999992795945554e-05, "loss": 2.0925, "step": 200 }, { "epoch": 0.0, "learning_rate": 9.999992057530167e-05, "loss": 2.0145, "step": 205 }, { "epoch": 0.0, "learning_rate": 9.999991283094559e-05, "loss": 1.9805, "step": 210 }, { "epoch": 0.0, "learning_rate": 9.999990472638732e-05, "loss": 2.0314, "step": 215 }, { "epoch": 0.0, "learning_rate": 9.999989626162693e-05, "loss": 2.0925, "step": 220 }, { "epoch": 0.0, "learning_rate": 9.999988743666448e-05, "loss": 2.0491, "step": 225 }, { "epoch": 0.0, "learning_rate": 9.999987825150001e-05, "loss": 2.0545, "step": 230 }, { "epoch": 0.0, "learning_rate": 9.999986870613364e-05, "loss": 2.0939, "step": 235 }, { "epoch": 0.0, "learning_rate": 9.999985880056539e-05, "loss": 2.0622, "step": 240 }, { "epoch": 0.0, "learning_rate": 9.999984853479535e-05, "loss": 2.032, "step": 245 }, { "epoch": 0.0, "learning_rate": 9.999983790882359e-05, "loss": 2.0223, "step": 250 }, { "epoch": 0.0, "learning_rate": 9.99998269226502e-05, "loss": 2.0005, "step": 255 }, { "epoch": 0.0, "learning_rate": 9.999981557627525e-05, "loss": 2.0248, "step": 260 }, { "epoch": 0.0, "learning_rate": 9.999980386969881e-05, "loss": 1.9435, "step": 265 }, { "epoch": 0.0, "learning_rate": 9.999979180292098e-05, "loss": 2.0549, "step": 270 }, { "epoch": 0.0, "learning_rate": 9.999977937594182e-05, "loss": 1.9996, "step": 275 }, { "epoch": 0.0, "learning_rate": 9.999976658876147e-05, "loss": 1.9798, "step": 280 }, { "epoch": 0.0, "learning_rate": 9.999975344137997e-05, "loss": 2.0018, "step": 285 }, { "epoch": 0.0, "learning_rate": 9.999973993379745e-05, "loss": 2.0488, "step": 290 }, { "epoch": 0.0, "learning_rate": 9.9999726066014e-05, "loss": 2.0399, "step": 295 }, { "epoch": 0.0, "learning_rate": 9.999971183802971e-05, "loss": 2.002, "step": 300 }, { "epoch": 0.0, "learning_rate": 9.999969724984468e-05, "loss": 2.0086, "step": 305 }, { "epoch": 0.0, "learning_rate": 9.999968230145904e-05, "loss": 1.9796, "step": 310 }, { "epoch": 0.0, "learning_rate": 9.999966699287287e-05, "loss": 1.9596, "step": 315 }, { "epoch": 0.0, "learning_rate": 9.999965132408629e-05, "loss": 2.0483, "step": 320 }, { "epoch": 0.0, "learning_rate": 9.999963529509941e-05, "loss": 2.0204, "step": 325 }, { "epoch": 0.0, "learning_rate": 9.999961890591233e-05, "loss": 1.979, "step": 330 }, { "epoch": 0.0, "learning_rate": 9.999960215652522e-05, "loss": 1.96, "step": 335 }, { "epoch": 0.0, "learning_rate": 9.999958504693816e-05, "loss": 1.9285, "step": 340 }, { "epoch": 0.0, "learning_rate": 9.999956757715128e-05, "loss": 2.0282, "step": 345 }, { "epoch": 0.0, "learning_rate": 9.99995497471647e-05, "loss": 1.9838, "step": 350 }, { "epoch": 0.0, "learning_rate": 9.999953155697856e-05, "loss": 1.9749, "step": 355 }, { "epoch": 0.0, "learning_rate": 9.999951300659298e-05, "loss": 1.973, "step": 360 }, { "epoch": 0.0, "learning_rate": 9.999949409600809e-05, "loss": 1.993, "step": 365 }, { "epoch": 0.0, "learning_rate": 9.999947482522405e-05, "loss": 1.9667, "step": 370 }, { "epoch": 0.0, "learning_rate": 9.999945519424099e-05, "loss": 2.0723, "step": 375 }, { "epoch": 0.0, "learning_rate": 9.999943520305903e-05, "loss": 2.0068, "step": 380 }, { "epoch": 0.0, "learning_rate": 9.999941485167834e-05, "loss": 1.9967, "step": 385 }, { "epoch": 0.0, "learning_rate": 9.999939414009907e-05, "loss": 2.0461, "step": 390 }, { "epoch": 0.0, "learning_rate": 9.999937306832134e-05, "loss": 2.0337, "step": 395 }, { "epoch": 0.0, "learning_rate": 9.999935163634532e-05, "loss": 1.9315, "step": 400 }, { "epoch": 0.0, "learning_rate": 9.999932984417116e-05, "loss": 1.9961, "step": 405 }, { "epoch": 0.0, "learning_rate": 9.999930769179902e-05, "loss": 1.9762, "step": 410 }, { "epoch": 0.0, "learning_rate": 9.999928517922906e-05, "loss": 1.9288, "step": 415 }, { "epoch": 0.0, "learning_rate": 9.999926230646144e-05, "loss": 1.9719, "step": 420 }, { "epoch": 0.0, "learning_rate": 9.999923907349633e-05, "loss": 1.9997, "step": 425 }, { "epoch": 0.0, "learning_rate": 9.99992154803339e-05, "loss": 2.0214, "step": 430 }, { "epoch": 0.0, "learning_rate": 9.99991915269743e-05, "loss": 1.9812, "step": 435 }, { "epoch": 0.0, "learning_rate": 9.999916721341772e-05, "loss": 2.0372, "step": 440 }, { "epoch": 0.0, "learning_rate": 9.999914253966432e-05, "loss": 2.0219, "step": 445 }, { "epoch": 0.0, "learning_rate": 9.999911750571431e-05, "loss": 2.0017, "step": 450 }, { "epoch": 0.0, "learning_rate": 9.999909211156783e-05, "loss": 2.0158, "step": 455 }, { "epoch": 0.0, "learning_rate": 9.99990663572251e-05, "loss": 2.0055, "step": 460 }, { "epoch": 0.0, "learning_rate": 9.999904024268627e-05, "loss": 1.9285, "step": 465 }, { "epoch": 0.0, "learning_rate": 9.999901376795156e-05, "loss": 1.969, "step": 470 }, { "epoch": 0.0, "learning_rate": 9.999898693302113e-05, "loss": 1.9541, "step": 475 }, { "epoch": 0.0, "learning_rate": 9.99989597378952e-05, "loss": 1.9871, "step": 480 }, { "epoch": 0.0, "learning_rate": 9.999893218257394e-05, "loss": 2.0086, "step": 485 }, { "epoch": 0.0, "learning_rate": 9.99989042670576e-05, "loss": 1.9881, "step": 490 }, { "epoch": 0.0, "learning_rate": 9.999887599134632e-05, "loss": 2.0128, "step": 495 }, { "epoch": 0.0, "learning_rate": 9.999884735544032e-05, "loss": 2.041, "step": 500 }, { "epoch": 0.0, "learning_rate": 9.999881835933983e-05, "loss": 1.8518, "step": 505 }, { "epoch": 0.0, "learning_rate": 9.999878900304504e-05, "loss": 1.9979, "step": 510 }, { "epoch": 0.0, "learning_rate": 9.999875928655617e-05, "loss": 1.9671, "step": 515 }, { "epoch": 0.0, "learning_rate": 9.999872920987342e-05, "loss": 2.0128, "step": 520 }, { "epoch": 0.0, "learning_rate": 9.999869877299703e-05, "loss": 1.9579, "step": 525 }, { "epoch": 0.0, "learning_rate": 9.999866797592719e-05, "loss": 1.9607, "step": 530 }, { "epoch": 0.0, "learning_rate": 9.999863681866415e-05, "loss": 1.9498, "step": 535 }, { "epoch": 0.0, "learning_rate": 9.999860530120811e-05, "loss": 1.9639, "step": 540 }, { "epoch": 0.0, "learning_rate": 9.999857342355931e-05, "loss": 1.9938, "step": 545 }, { "epoch": 0.0, "learning_rate": 9.999854118571798e-05, "loss": 1.934, "step": 550 }, { "epoch": 0.0, "learning_rate": 9.999850858768436e-05, "loss": 2.0115, "step": 555 }, { "epoch": 0.0, "learning_rate": 9.999847562945867e-05, "loss": 1.9693, "step": 560 }, { "epoch": 0.0, "learning_rate": 9.999844231104116e-05, "loss": 2.0074, "step": 565 }, { "epoch": 0.0, "learning_rate": 9.999840863243205e-05, "loss": 1.9344, "step": 570 }, { "epoch": 0.0, "learning_rate": 9.99983745936316e-05, "loss": 1.9331, "step": 575 }, { "epoch": 0.0, "learning_rate": 9.999834019464005e-05, "loss": 1.9767, "step": 580 }, { "epoch": 0.0, "learning_rate": 9.999830543545765e-05, "loss": 1.9051, "step": 585 }, { "epoch": 0.0, "learning_rate": 9.999827031608466e-05, "loss": 1.9081, "step": 590 }, { "epoch": 0.0, "learning_rate": 9.999823483652131e-05, "loss": 1.8921, "step": 595 }, { "epoch": 0.0, "learning_rate": 9.999819899676786e-05, "loss": 1.9202, "step": 600 }, { "epoch": 0.0, "learning_rate": 9.999816279682459e-05, "loss": 1.9765, "step": 605 }, { "epoch": 0.0, "learning_rate": 9.999812623669174e-05, "loss": 1.9036, "step": 610 }, { "epoch": 0.0, "learning_rate": 9.99980893163696e-05, "loss": 1.9576, "step": 615 }, { "epoch": 0.0, "learning_rate": 9.99980520358584e-05, "loss": 1.9217, "step": 620 }, { "epoch": 0.0, "learning_rate": 9.999801439515842e-05, "loss": 2.0218, "step": 625 }, { "epoch": 0.0, "learning_rate": 9.999797639426993e-05, "loss": 1.9602, "step": 630 }, { "epoch": 0.0, "learning_rate": 9.999793803319322e-05, "loss": 1.9604, "step": 635 }, { "epoch": 0.0, "learning_rate": 9.999789931192855e-05, "loss": 2.0027, "step": 640 }, { "epoch": 0.0, "learning_rate": 9.999786023047622e-05, "loss": 1.9514, "step": 645 }, { "epoch": 0.0, "learning_rate": 9.999782078883649e-05, "loss": 1.9172, "step": 650 }, { "epoch": 0.0, "learning_rate": 9.999778098700963e-05, "loss": 1.9848, "step": 655 }, { "epoch": 0.0, "learning_rate": 9.999774082499596e-05, "loss": 1.975, "step": 660 }, { "epoch": 0.0, "learning_rate": 9.999770030279576e-05, "loss": 1.9909, "step": 665 }, { "epoch": 0.0, "learning_rate": 9.999765942040933e-05, "loss": 1.9592, "step": 670 }, { "epoch": 0.0, "learning_rate": 9.999761817783693e-05, "loss": 1.942, "step": 675 }, { "epoch": 0.0, "learning_rate": 9.99975765750789e-05, "loss": 1.8839, "step": 680 }, { "epoch": 0.0, "learning_rate": 9.999753461213551e-05, "loss": 1.9796, "step": 685 }, { "epoch": 0.0, "learning_rate": 9.999749228900708e-05, "loss": 1.9665, "step": 690 }, { "epoch": 0.0, "learning_rate": 9.999744960569389e-05, "loss": 2.003, "step": 695 }, { "epoch": 0.0, "learning_rate": 9.999740656219629e-05, "loss": 1.9, "step": 700 }, { "epoch": 0.0, "learning_rate": 9.999736315851453e-05, "loss": 1.8498, "step": 705 }, { "epoch": 0.0, "learning_rate": 9.999731939464898e-05, "loss": 1.9311, "step": 710 }, { "epoch": 0.0, "learning_rate": 9.999727527059995e-05, "loss": 1.9838, "step": 715 }, { "epoch": 0.0, "learning_rate": 9.999723078636772e-05, "loss": 1.978, "step": 720 }, { "epoch": 0.0, "learning_rate": 9.999718594195262e-05, "loss": 1.9692, "step": 725 }, { "epoch": 0.0, "learning_rate": 9.9997140737355e-05, "loss": 1.9759, "step": 730 }, { "epoch": 0.0, "learning_rate": 9.999709517257515e-05, "loss": 2.0019, "step": 735 }, { "epoch": 0.0, "learning_rate": 9.999704924761345e-05, "loss": 1.9343, "step": 740 }, { "epoch": 0.0, "learning_rate": 9.999700296247017e-05, "loss": 1.897, "step": 745 }, { "epoch": 0.0, "learning_rate": 9.999695631714568e-05, "loss": 1.968, "step": 750 }, { "epoch": 0.0, "learning_rate": 9.999690931164032e-05, "loss": 1.9974, "step": 755 }, { "epoch": 0.0, "learning_rate": 9.999686194595441e-05, "loss": 1.9236, "step": 760 }, { "epoch": 0.0, "learning_rate": 9.999681422008828e-05, "loss": 1.9327, "step": 765 }, { "epoch": 0.0, "learning_rate": 9.999676613404233e-05, "loss": 1.91, "step": 770 }, { "epoch": 0.0, "learning_rate": 9.999671768781685e-05, "loss": 1.9407, "step": 775 }, { "epoch": 0.0, "learning_rate": 9.99966688814122e-05, "loss": 1.9713, "step": 780 }, { "epoch": 0.0, "learning_rate": 9.999661971482874e-05, "loss": 1.9069, "step": 785 }, { "epoch": 0.0, "learning_rate": 9.999657018806683e-05, "loss": 1.9184, "step": 790 }, { "epoch": 0.0, "learning_rate": 9.99965203011268e-05, "loss": 1.8443, "step": 795 }, { "epoch": 0.0, "learning_rate": 9.999647005400905e-05, "loss": 1.9208, "step": 800 }, { "epoch": 0.0, "learning_rate": 9.999641944671392e-05, "loss": 1.9233, "step": 805 }, { "epoch": 0.0, "learning_rate": 9.999636847924177e-05, "loss": 1.9092, "step": 810 }, { "epoch": 0.0, "learning_rate": 9.999631715159299e-05, "loss": 1.9216, "step": 815 }, { "epoch": 0.0, "learning_rate": 9.999626546376791e-05, "loss": 1.9448, "step": 820 }, { "epoch": 0.0, "learning_rate": 9.999621341576692e-05, "loss": 1.9197, "step": 825 }, { "epoch": 0.0, "learning_rate": 9.999616100759042e-05, "loss": 1.9171, "step": 830 }, { "epoch": 0.0, "learning_rate": 9.999610823923878e-05, "loss": 1.9982, "step": 835 }, { "epoch": 0.0, "learning_rate": 9.999605511071234e-05, "loss": 1.9602, "step": 840 }, { "epoch": 0.0, "learning_rate": 9.999600162201152e-05, "loss": 1.9217, "step": 845 }, { "epoch": 0.0, "learning_rate": 9.99959477731367e-05, "loss": 1.9001, "step": 850 }, { "epoch": 0.0, "learning_rate": 9.999589356408825e-05, "loss": 1.9441, "step": 855 }, { "epoch": 0.0, "learning_rate": 9.999583899486659e-05, "loss": 1.8922, "step": 860 }, { "epoch": 0.0, "learning_rate": 9.999578406547209e-05, "loss": 1.9603, "step": 865 }, { "epoch": 0.0, "learning_rate": 9.999572877590515e-05, "loss": 1.9526, "step": 870 }, { "epoch": 0.0, "learning_rate": 9.999567312616618e-05, "loss": 1.9902, "step": 875 }, { "epoch": 0.0, "learning_rate": 9.999561711625557e-05, "loss": 1.9923, "step": 880 }, { "epoch": 0.0, "learning_rate": 9.999556074617372e-05, "loss": 1.9565, "step": 885 }, { "epoch": 0.0, "learning_rate": 9.999550401592105e-05, "loss": 1.9546, "step": 890 }, { "epoch": 0.0, "learning_rate": 9.999544692549796e-05, "loss": 1.91, "step": 895 }, { "epoch": 0.0, "learning_rate": 9.999538947490485e-05, "loss": 1.9388, "step": 900 }, { "epoch": 0.0, "learning_rate": 9.999533166414216e-05, "loss": 1.8605, "step": 905 }, { "epoch": 0.0, "learning_rate": 9.999527349321029e-05, "loss": 1.8899, "step": 910 }, { "epoch": 0.0, "learning_rate": 9.999521496210966e-05, "loss": 1.8925, "step": 915 }, { "epoch": 0.0, "learning_rate": 9.999515607084069e-05, "loss": 1.9415, "step": 920 }, { "epoch": 0.0, "learning_rate": 9.99950968194038e-05, "loss": 1.9416, "step": 925 }, { "epoch": 0.01, "learning_rate": 9.999503720779943e-05, "loss": 1.902, "step": 930 }, { "epoch": 0.01, "learning_rate": 9.999497723602803e-05, "loss": 1.9278, "step": 935 }, { "epoch": 0.01, "learning_rate": 9.999491690408997e-05, "loss": 2.0246, "step": 940 }, { "epoch": 0.01, "learning_rate": 9.999485621198573e-05, "loss": 1.8807, "step": 945 }, { "epoch": 0.01, "learning_rate": 9.999479515971575e-05, "loss": 1.909, "step": 950 }, { "epoch": 0.01, "learning_rate": 9.999473374728043e-05, "loss": 1.9355, "step": 955 }, { "epoch": 0.01, "learning_rate": 9.999467197468026e-05, "loss": 1.8848, "step": 960 }, { "epoch": 0.01, "learning_rate": 9.999460984191566e-05, "loss": 1.9468, "step": 965 }, { "epoch": 0.01, "learning_rate": 9.99945473489871e-05, "loss": 1.911, "step": 970 }, { "epoch": 0.01, "learning_rate": 9.999448449589499e-05, "loss": 1.8453, "step": 975 }, { "epoch": 0.01, "learning_rate": 9.999442128263981e-05, "loss": 1.9039, "step": 980 }, { "epoch": 0.01, "learning_rate": 9.9994357709222e-05, "loss": 1.9046, "step": 985 }, { "epoch": 0.01, "learning_rate": 9.999429377564206e-05, "loss": 1.9086, "step": 990 }, { "epoch": 0.01, "learning_rate": 9.99942294819004e-05, "loss": 1.8607, "step": 995 }, { "epoch": 0.01, "learning_rate": 9.99941648279975e-05, "loss": 1.9031, "step": 1000 }, { "epoch": 0.01, "learning_rate": 9.999409981393384e-05, "loss": 1.91, "step": 1005 }, { "epoch": 0.01, "learning_rate": 9.999403443970988e-05, "loss": 1.9401, "step": 1010 }, { "epoch": 0.01, "learning_rate": 9.999396870532605e-05, "loss": 1.9046, "step": 1015 }, { "epoch": 0.01, "learning_rate": 9.99939026107829e-05, "loss": 1.915, "step": 1020 }, { "epoch": 0.01, "learning_rate": 9.999383615608085e-05, "loss": 1.9607, "step": 1025 }, { "epoch": 0.01, "learning_rate": 9.99937693412204e-05, "loss": 1.9254, "step": 1030 }, { "epoch": 0.01, "learning_rate": 9.999370216620203e-05, "loss": 1.9682, "step": 1035 }, { "epoch": 0.01, "learning_rate": 9.999363463102623e-05, "loss": 1.9092, "step": 1040 }, { "epoch": 0.01, "learning_rate": 9.999356673569345e-05, "loss": 1.8798, "step": 1045 }, { "epoch": 0.01, "learning_rate": 9.999349848020423e-05, "loss": 1.9795, "step": 1050 }, { "epoch": 0.01, "learning_rate": 9.999342986455904e-05, "loss": 1.9162, "step": 1055 }, { "epoch": 0.01, "learning_rate": 9.999336088875836e-05, "loss": 1.9005, "step": 1060 }, { "epoch": 0.01, "learning_rate": 9.999329155280271e-05, "loss": 1.9429, "step": 1065 }, { "epoch": 0.01, "learning_rate": 9.999322185669258e-05, "loss": 1.9094, "step": 1070 }, { "epoch": 0.01, "learning_rate": 9.999315180042846e-05, "loss": 1.9057, "step": 1075 }, { "epoch": 0.01, "learning_rate": 9.999308138401086e-05, "loss": 1.8925, "step": 1080 }, { "epoch": 0.01, "learning_rate": 9.99930106074403e-05, "loss": 1.9216, "step": 1085 }, { "epoch": 0.01, "learning_rate": 9.999293947071731e-05, "loss": 1.956, "step": 1090 }, { "epoch": 0.01, "learning_rate": 9.999286797384235e-05, "loss": 1.8963, "step": 1095 }, { "epoch": 0.01, "learning_rate": 9.999279611681596e-05, "loss": 1.9205, "step": 1100 }, { "epoch": 0.01, "learning_rate": 9.999272389963868e-05, "loss": 1.875, "step": 1105 }, { "epoch": 0.01, "learning_rate": 9.999265132231099e-05, "loss": 1.8996, "step": 1110 }, { "epoch": 0.01, "learning_rate": 9.999257838483342e-05, "loss": 1.9865, "step": 1115 }, { "epoch": 0.01, "learning_rate": 9.999250508720652e-05, "loss": 1.9096, "step": 1120 }, { "epoch": 0.01, "learning_rate": 9.999243142943082e-05, "loss": 1.8916, "step": 1125 }, { "epoch": 0.01, "learning_rate": 9.999235741150682e-05, "loss": 1.881, "step": 1130 }, { "epoch": 0.01, "learning_rate": 9.999228303343507e-05, "loss": 1.9408, "step": 1135 }, { "epoch": 0.01, "learning_rate": 9.999220829521612e-05, "loss": 1.906, "step": 1140 }, { "epoch": 0.01, "learning_rate": 9.999213319685047e-05, "loss": 1.908, "step": 1145 }, { "epoch": 0.01, "learning_rate": 9.99920577383387e-05, "loss": 1.8456, "step": 1150 }, { "epoch": 0.01, "learning_rate": 9.999198191968133e-05, "loss": 1.8625, "step": 1155 }, { "epoch": 0.01, "learning_rate": 9.999190574087892e-05, "loss": 1.9613, "step": 1160 }, { "epoch": 0.01, "learning_rate": 9.9991829201932e-05, "loss": 1.9402, "step": 1165 }, { "epoch": 0.01, "learning_rate": 9.999175230284114e-05, "loss": 1.9587, "step": 1170 }, { "epoch": 0.01, "learning_rate": 9.99916750436069e-05, "loss": 1.8951, "step": 1175 }, { "epoch": 0.01, "learning_rate": 9.999159742422981e-05, "loss": 1.9246, "step": 1180 }, { "epoch": 0.01, "learning_rate": 9.999151944471046e-05, "loss": 1.9317, "step": 1185 }, { "epoch": 0.01, "learning_rate": 9.999144110504939e-05, "loss": 1.8751, "step": 1190 }, { "epoch": 0.01, "learning_rate": 9.999136240524715e-05, "loss": 1.9413, "step": 1195 }, { "epoch": 0.01, "learning_rate": 9.999128334530434e-05, "loss": 1.8909, "step": 1200 }, { "epoch": 0.01, "learning_rate": 9.999120392522152e-05, "loss": 1.8498, "step": 1205 }, { "epoch": 0.01, "learning_rate": 9.999112414499927e-05, "loss": 1.8723, "step": 1210 }, { "epoch": 0.01, "learning_rate": 9.999104400463815e-05, "loss": 1.865, "step": 1215 }, { "epoch": 0.01, "learning_rate": 9.999096350413872e-05, "loss": 1.9007, "step": 1220 }, { "epoch": 0.01, "learning_rate": 9.99908826435016e-05, "loss": 1.96, "step": 1225 }, { "epoch": 0.01, "learning_rate": 9.999080142272734e-05, "loss": 1.8594, "step": 1230 }, { "epoch": 0.01, "learning_rate": 9.999071984181655e-05, "loss": 1.8957, "step": 1235 }, { "epoch": 0.01, "learning_rate": 9.99906379007698e-05, "loss": 1.8842, "step": 1240 }, { "epoch": 0.01, "learning_rate": 9.99905555995877e-05, "loss": 1.917, "step": 1245 }, { "epoch": 0.01, "learning_rate": 9.99904729382708e-05, "loss": 1.858, "step": 1250 }, { "epoch": 0.01, "learning_rate": 9.999038991681975e-05, "loss": 1.8365, "step": 1255 }, { "epoch": 0.01, "learning_rate": 9.999030653523511e-05, "loss": 1.9336, "step": 1260 }, { "epoch": 0.01, "learning_rate": 9.99902227935175e-05, "loss": 1.9142, "step": 1265 }, { "epoch": 0.01, "learning_rate": 9.999013869166752e-05, "loss": 1.961, "step": 1270 }, { "epoch": 0.01, "learning_rate": 9.999005422968577e-05, "loss": 1.9193, "step": 1275 }, { "epoch": 0.01, "learning_rate": 9.998996940757286e-05, "loss": 1.9087, "step": 1280 }, { "epoch": 0.01, "learning_rate": 9.99898842253294e-05, "loss": 1.8651, "step": 1285 }, { "epoch": 0.01, "learning_rate": 9.998979868295602e-05, "loss": 1.8743, "step": 1290 }, { "epoch": 0.01, "learning_rate": 9.998971278045331e-05, "loss": 1.9741, "step": 1295 }, { "epoch": 0.01, "learning_rate": 9.998962651782188e-05, "loss": 1.9677, "step": 1300 }, { "epoch": 0.01, "learning_rate": 9.998953989506241e-05, "loss": 1.8938, "step": 1305 }, { "epoch": 0.01, "learning_rate": 9.998945291217548e-05, "loss": 1.9325, "step": 1310 }, { "epoch": 0.01, "learning_rate": 9.99893655691617e-05, "loss": 1.8945, "step": 1315 }, { "epoch": 0.01, "learning_rate": 9.998927786602173e-05, "loss": 1.9258, "step": 1320 }, { "epoch": 0.01, "learning_rate": 9.99891898027562e-05, "loss": 1.843, "step": 1325 }, { "epoch": 0.01, "learning_rate": 9.998910137936575e-05, "loss": 1.8947, "step": 1330 }, { "epoch": 0.01, "learning_rate": 9.998901259585098e-05, "loss": 1.9495, "step": 1335 }, { "epoch": 0.01, "learning_rate": 9.998892345221258e-05, "loss": 1.8783, "step": 1340 }, { "epoch": 0.01, "learning_rate": 9.998883394845115e-05, "loss": 1.8467, "step": 1345 }, { "epoch": 0.01, "learning_rate": 9.998874408456738e-05, "loss": 1.8569, "step": 1350 }, { "epoch": 0.01, "learning_rate": 9.998865386056186e-05, "loss": 1.9579, "step": 1355 }, { "epoch": 0.01, "learning_rate": 9.998856327643528e-05, "loss": 1.9173, "step": 1360 }, { "epoch": 0.01, "learning_rate": 9.998847233218827e-05, "loss": 1.8705, "step": 1365 }, { "epoch": 0.01, "learning_rate": 9.998838102782151e-05, "loss": 1.8878, "step": 1370 }, { "epoch": 0.01, "learning_rate": 9.998828936333564e-05, "loss": 1.9285, "step": 1375 }, { "epoch": 0.01, "learning_rate": 9.998819733873134e-05, "loss": 1.8701, "step": 1380 }, { "epoch": 0.01, "learning_rate": 9.998810495400925e-05, "loss": 1.8906, "step": 1385 }, { "epoch": 0.01, "learning_rate": 9.998801220917003e-05, "loss": 1.8993, "step": 1390 }, { "epoch": 0.01, "learning_rate": 9.998791910421438e-05, "loss": 1.9558, "step": 1395 }, { "epoch": 0.01, "learning_rate": 9.998782563914295e-05, "loss": 1.9104, "step": 1400 }, { "epoch": 0.01, "learning_rate": 9.998773181395642e-05, "loss": 1.9224, "step": 1405 }, { "epoch": 0.01, "learning_rate": 9.998763762865545e-05, "loss": 1.9117, "step": 1410 }, { "epoch": 0.01, "learning_rate": 9.998754308324072e-05, "loss": 1.9125, "step": 1415 }, { "epoch": 0.01, "learning_rate": 9.998744817771295e-05, "loss": 1.9274, "step": 1420 }, { "epoch": 0.01, "learning_rate": 9.998735291207278e-05, "loss": 1.9036, "step": 1425 }, { "epoch": 0.01, "learning_rate": 9.99872572863209e-05, "loss": 1.9178, "step": 1430 }, { "epoch": 0.01, "learning_rate": 9.998716130045803e-05, "loss": 1.9142, "step": 1435 }, { "epoch": 0.01, "learning_rate": 9.998706495448484e-05, "loss": 1.8841, "step": 1440 }, { "epoch": 0.01, "learning_rate": 9.998696824840202e-05, "loss": 1.897, "step": 1445 }, { "epoch": 0.01, "learning_rate": 9.998687118221028e-05, "loss": 1.9204, "step": 1450 }, { "epoch": 0.01, "learning_rate": 9.99867737559103e-05, "loss": 1.8878, "step": 1455 }, { "epoch": 0.01, "learning_rate": 9.99866759695028e-05, "loss": 1.8745, "step": 1460 }, { "epoch": 0.01, "learning_rate": 9.998657782298849e-05, "loss": 1.9123, "step": 1465 }, { "epoch": 0.01, "learning_rate": 9.998647931636804e-05, "loss": 1.8695, "step": 1470 }, { "epoch": 0.01, "learning_rate": 9.998638044964221e-05, "loss": 1.8866, "step": 1475 }, { "epoch": 0.01, "learning_rate": 9.998628122281168e-05, "loss": 1.9119, "step": 1480 }, { "epoch": 0.01, "learning_rate": 9.998618163587716e-05, "loss": 1.8979, "step": 1485 }, { "epoch": 0.01, "learning_rate": 9.99860816888394e-05, "loss": 1.9406, "step": 1490 }, { "epoch": 0.01, "learning_rate": 9.998598138169908e-05, "loss": 1.957, "step": 1495 }, { "epoch": 0.01, "learning_rate": 9.998588071445696e-05, "loss": 1.9325, "step": 1500 }, { "epoch": 0.01, "learning_rate": 9.998577968711374e-05, "loss": 1.9145, "step": 1505 }, { "epoch": 0.01, "learning_rate": 9.998567829967015e-05, "loss": 1.893, "step": 1510 }, { "epoch": 0.01, "learning_rate": 9.998557655212692e-05, "loss": 1.8729, "step": 1515 }, { "epoch": 0.01, "learning_rate": 9.998547444448481e-05, "loss": 1.907, "step": 1520 }, { "epoch": 0.01, "learning_rate": 9.998537197674452e-05, "loss": 1.9196, "step": 1525 }, { "epoch": 0.01, "learning_rate": 9.99852691489068e-05, "loss": 1.7937, "step": 1530 }, { "epoch": 0.01, "learning_rate": 9.998516596097241e-05, "loss": 1.8703, "step": 1535 }, { "epoch": 0.01, "learning_rate": 9.998506241294206e-05, "loss": 1.8083, "step": 1540 }, { "epoch": 0.01, "learning_rate": 9.998495850481652e-05, "loss": 1.8889, "step": 1545 }, { "epoch": 0.01, "learning_rate": 9.998485423659652e-05, "loss": 1.9273, "step": 1550 }, { "epoch": 0.01, "learning_rate": 9.998474960828283e-05, "loss": 1.8356, "step": 1555 }, { "epoch": 0.01, "learning_rate": 9.99846446198762e-05, "loss": 1.8652, "step": 1560 }, { "epoch": 0.01, "learning_rate": 9.998453927137739e-05, "loss": 1.9383, "step": 1565 }, { "epoch": 0.01, "learning_rate": 9.998443356278712e-05, "loss": 1.88, "step": 1570 }, { "epoch": 0.01, "learning_rate": 9.998432749410623e-05, "loss": 1.8284, "step": 1575 }, { "epoch": 0.01, "learning_rate": 9.99842210653354e-05, "loss": 1.8584, "step": 1580 }, { "epoch": 0.01, "learning_rate": 9.998411427647545e-05, "loss": 1.8358, "step": 1585 }, { "epoch": 0.01, "learning_rate": 9.998400712752711e-05, "loss": 1.8513, "step": 1590 }, { "epoch": 0.01, "learning_rate": 9.99838996184912e-05, "loss": 1.8252, "step": 1595 }, { "epoch": 0.01, "learning_rate": 9.998379174936847e-05, "loss": 1.92, "step": 1600 }, { "epoch": 0.01, "learning_rate": 9.998368352015968e-05, "loss": 1.8791, "step": 1605 }, { "epoch": 0.01, "learning_rate": 9.998357493086563e-05, "loss": 1.9149, "step": 1610 }, { "epoch": 0.01, "learning_rate": 9.998346598148712e-05, "loss": 1.8426, "step": 1615 }, { "epoch": 0.01, "learning_rate": 9.998335667202489e-05, "loss": 1.8733, "step": 1620 }, { "epoch": 0.01, "learning_rate": 9.998324700247976e-05, "loss": 1.8456, "step": 1625 }, { "epoch": 0.01, "learning_rate": 9.998313697285252e-05, "loss": 1.8117, "step": 1630 }, { "epoch": 0.01, "learning_rate": 9.998302658314392e-05, "loss": 1.9482, "step": 1635 }, { "epoch": 0.01, "learning_rate": 9.998291583335482e-05, "loss": 1.8499, "step": 1640 }, { "epoch": 0.01, "learning_rate": 9.998280472348597e-05, "loss": 1.8609, "step": 1645 }, { "epoch": 0.01, "learning_rate": 9.99826932535382e-05, "loss": 1.8132, "step": 1650 }, { "epoch": 0.01, "learning_rate": 9.998258142351229e-05, "loss": 1.8419, "step": 1655 }, { "epoch": 0.01, "learning_rate": 9.998246923340907e-05, "loss": 1.8447, "step": 1660 }, { "epoch": 0.01, "learning_rate": 9.99823566832293e-05, "loss": 1.7931, "step": 1665 }, { "epoch": 0.01, "learning_rate": 9.998224377297386e-05, "loss": 1.8881, "step": 1670 }, { "epoch": 0.01, "learning_rate": 9.998213050264351e-05, "loss": 1.8717, "step": 1675 }, { "epoch": 0.01, "learning_rate": 9.99820168722391e-05, "loss": 1.8785, "step": 1680 }, { "epoch": 0.01, "learning_rate": 9.998190288176142e-05, "loss": 1.842, "step": 1685 }, { "epoch": 0.01, "learning_rate": 9.998178853121132e-05, "loss": 1.8572, "step": 1690 }, { "epoch": 0.01, "learning_rate": 9.998167382058958e-05, "loss": 1.8937, "step": 1695 }, { "epoch": 0.01, "learning_rate": 9.998155874989708e-05, "loss": 1.9485, "step": 1700 }, { "epoch": 0.01, "learning_rate": 9.998144331913462e-05, "loss": 1.847, "step": 1705 }, { "epoch": 0.01, "learning_rate": 9.998132752830303e-05, "loss": 1.9088, "step": 1710 }, { "epoch": 0.01, "learning_rate": 9.998121137740313e-05, "loss": 1.8807, "step": 1715 }, { "epoch": 0.01, "learning_rate": 9.99810948664358e-05, "loss": 1.914, "step": 1720 }, { "epoch": 0.01, "learning_rate": 9.998097799540184e-05, "loss": 1.8524, "step": 1725 }, { "epoch": 0.01, "learning_rate": 9.998086076430212e-05, "loss": 1.8592, "step": 1730 }, { "epoch": 0.01, "learning_rate": 9.998074317313747e-05, "loss": 1.842, "step": 1735 }, { "epoch": 0.01, "learning_rate": 9.998062522190873e-05, "loss": 1.8367, "step": 1740 }, { "epoch": 0.01, "learning_rate": 9.998050691061676e-05, "loss": 1.8962, "step": 1745 }, { "epoch": 0.01, "learning_rate": 9.998038823926241e-05, "loss": 1.893, "step": 1750 }, { "epoch": 0.01, "learning_rate": 9.998026920784654e-05, "loss": 1.8433, "step": 1755 }, { "epoch": 0.01, "learning_rate": 9.998014981637e-05, "loss": 1.8367, "step": 1760 }, { "epoch": 0.01, "learning_rate": 9.998003006483367e-05, "loss": 1.8803, "step": 1765 }, { "epoch": 0.01, "learning_rate": 9.997990995323838e-05, "loss": 1.8473, "step": 1770 }, { "epoch": 0.01, "learning_rate": 9.9979789481585e-05, "loss": 1.8628, "step": 1775 }, { "epoch": 0.01, "learning_rate": 9.997966864987443e-05, "loss": 1.8383, "step": 1780 }, { "epoch": 0.01, "learning_rate": 9.997954745810752e-05, "loss": 1.882, "step": 1785 }, { "epoch": 0.01, "learning_rate": 9.997942590628513e-05, "loss": 1.8854, "step": 1790 }, { "epoch": 0.01, "learning_rate": 9.997930399440815e-05, "loss": 1.8365, "step": 1795 }, { "epoch": 0.01, "learning_rate": 9.997918172247745e-05, "loss": 1.8736, "step": 1800 }, { "epoch": 0.01, "learning_rate": 9.997905909049393e-05, "loss": 1.9024, "step": 1805 }, { "epoch": 0.01, "learning_rate": 9.997893609845846e-05, "loss": 1.879, "step": 1810 }, { "epoch": 0.01, "learning_rate": 9.997881274637192e-05, "loss": 1.8945, "step": 1815 }, { "epoch": 0.01, "learning_rate": 9.997868903423522e-05, "loss": 1.8654, "step": 1820 }, { "epoch": 0.01, "learning_rate": 9.997856496204921e-05, "loss": 1.8604, "step": 1825 }, { "epoch": 0.01, "learning_rate": 9.997844052981483e-05, "loss": 1.8511, "step": 1830 }, { "epoch": 0.01, "learning_rate": 9.997831573753296e-05, "loss": 1.823, "step": 1835 }, { "epoch": 0.01, "learning_rate": 9.997819058520448e-05, "loss": 1.875, "step": 1840 }, { "epoch": 0.01, "learning_rate": 9.997806507283031e-05, "loss": 1.8417, "step": 1845 }, { "epoch": 0.01, "learning_rate": 9.997793920041138e-05, "loss": 1.9319, "step": 1850 }, { "epoch": 0.01, "learning_rate": 9.997781296794854e-05, "loss": 1.8542, "step": 1855 }, { "epoch": 0.01, "learning_rate": 9.997768637544274e-05, "loss": 1.9018, "step": 1860 }, { "epoch": 0.01, "learning_rate": 9.997755942289487e-05, "loss": 1.8523, "step": 1865 }, { "epoch": 0.01, "learning_rate": 9.997743211030585e-05, "loss": 1.8378, "step": 1870 }, { "epoch": 0.01, "learning_rate": 9.997730443767663e-05, "loss": 1.8341, "step": 1875 }, { "epoch": 0.01, "learning_rate": 9.997717640500808e-05, "loss": 1.9041, "step": 1880 }, { "epoch": 0.01, "learning_rate": 9.997704801230115e-05, "loss": 1.8193, "step": 1885 }, { "epoch": 0.01, "learning_rate": 9.997691925955675e-05, "loss": 1.8723, "step": 1890 }, { "epoch": 0.01, "learning_rate": 9.997679014677583e-05, "loss": 1.8305, "step": 1895 }, { "epoch": 0.01, "learning_rate": 9.997666067395929e-05, "loss": 1.804, "step": 1900 }, { "epoch": 0.01, "learning_rate": 9.99765308411081e-05, "loss": 1.9002, "step": 1905 }, { "epoch": 0.01, "learning_rate": 9.997640064822315e-05, "loss": 1.885, "step": 1910 }, { "epoch": 0.01, "learning_rate": 9.997627009530541e-05, "loss": 1.8439, "step": 1915 }, { "epoch": 0.01, "learning_rate": 9.997613918235582e-05, "loss": 1.8298, "step": 1920 }, { "epoch": 0.01, "learning_rate": 9.997600790937531e-05, "loss": 1.8605, "step": 1925 }, { "epoch": 0.01, "learning_rate": 9.997587627636484e-05, "loss": 1.9205, "step": 1930 }, { "epoch": 0.01, "learning_rate": 9.997574428332535e-05, "loss": 1.8524, "step": 1935 }, { "epoch": 0.01, "learning_rate": 9.997561193025779e-05, "loss": 1.8468, "step": 1940 }, { "epoch": 0.01, "learning_rate": 9.997547921716311e-05, "loss": 1.8043, "step": 1945 }, { "epoch": 0.01, "learning_rate": 9.997534614404227e-05, "loss": 1.7971, "step": 1950 }, { "epoch": 0.01, "learning_rate": 9.997521271089622e-05, "loss": 1.8675, "step": 1955 }, { "epoch": 0.01, "learning_rate": 9.997507891772596e-05, "loss": 1.927, "step": 1960 }, { "epoch": 0.01, "learning_rate": 9.99749447645324e-05, "loss": 1.829, "step": 1965 }, { "epoch": 0.01, "learning_rate": 9.997481025131652e-05, "loss": 1.9146, "step": 1970 }, { "epoch": 0.01, "learning_rate": 9.997467537807932e-05, "loss": 1.8755, "step": 1975 }, { "epoch": 0.01, "learning_rate": 9.997454014482175e-05, "loss": 1.8505, "step": 1980 }, { "epoch": 0.01, "learning_rate": 9.997440455154479e-05, "loss": 1.8559, "step": 1985 }, { "epoch": 0.01, "learning_rate": 9.997426859824938e-05, "loss": 1.817, "step": 1990 }, { "epoch": 0.01, "learning_rate": 9.997413228493657e-05, "loss": 1.8241, "step": 1995 }, { "epoch": 0.01, "learning_rate": 9.997399561160729e-05, "loss": 1.933, "step": 2000 }, { "epoch": 0.01, "learning_rate": 9.997385857826254e-05, "loss": 1.8827, "step": 2005 }, { "epoch": 0.01, "learning_rate": 9.997372118490331e-05, "loss": 1.8872, "step": 2010 }, { "epoch": 0.01, "learning_rate": 9.997358343153059e-05, "loss": 1.8316, "step": 2015 }, { "epoch": 0.01, "learning_rate": 9.997344531814534e-05, "loss": 1.9, "step": 2020 }, { "epoch": 0.01, "learning_rate": 9.99733068447486e-05, "loss": 1.8538, "step": 2025 }, { "epoch": 0.01, "learning_rate": 9.997316801134136e-05, "loss": 1.9013, "step": 2030 }, { "epoch": 0.01, "learning_rate": 9.99730288179246e-05, "loss": 1.8587, "step": 2035 }, { "epoch": 0.01, "learning_rate": 9.997288926449933e-05, "loss": 1.8666, "step": 2040 }, { "epoch": 0.01, "learning_rate": 9.997274935106656e-05, "loss": 1.8553, "step": 2045 }, { "epoch": 0.01, "learning_rate": 9.99726090776273e-05, "loss": 1.8691, "step": 2050 }, { "epoch": 0.01, "learning_rate": 9.997246844418256e-05, "loss": 1.8532, "step": 2055 }, { "epoch": 0.01, "learning_rate": 9.997232745073336e-05, "loss": 1.854, "step": 2060 }, { "epoch": 0.01, "learning_rate": 9.997218609728068e-05, "loss": 1.8396, "step": 2065 }, { "epoch": 0.01, "learning_rate": 9.997204438382556e-05, "loss": 1.8831, "step": 2070 }, { "epoch": 0.01, "learning_rate": 9.997190231036906e-05, "loss": 1.9139, "step": 2075 }, { "epoch": 0.01, "learning_rate": 9.997175987691215e-05, "loss": 1.9043, "step": 2080 }, { "epoch": 0.01, "learning_rate": 9.997161708345588e-05, "loss": 1.8698, "step": 2085 }, { "epoch": 0.01, "learning_rate": 9.997147393000126e-05, "loss": 1.8866, "step": 2090 }, { "epoch": 0.01, "learning_rate": 9.997133041654934e-05, "loss": 1.7803, "step": 2095 }, { "epoch": 0.01, "learning_rate": 9.997118654310115e-05, "loss": 1.8311, "step": 2100 }, { "epoch": 0.01, "learning_rate": 9.997104230965775e-05, "loss": 1.8298, "step": 2105 }, { "epoch": 0.01, "learning_rate": 9.997089771622012e-05, "loss": 1.8536, "step": 2110 }, { "epoch": 0.01, "learning_rate": 9.997075276278935e-05, "loss": 1.8554, "step": 2115 }, { "epoch": 0.01, "learning_rate": 9.997060744936646e-05, "loss": 1.8438, "step": 2120 }, { "epoch": 0.01, "learning_rate": 9.997046177595253e-05, "loss": 1.8723, "step": 2125 }, { "epoch": 0.01, "learning_rate": 9.997031574254856e-05, "loss": 1.8877, "step": 2130 }, { "epoch": 0.01, "learning_rate": 9.997016934915564e-05, "loss": 1.8449, "step": 2135 }, { "epoch": 0.01, "learning_rate": 9.997002259577481e-05, "loss": 1.849, "step": 2140 }, { "epoch": 0.01, "learning_rate": 9.996987548240714e-05, "loss": 1.8079, "step": 2145 }, { "epoch": 0.01, "learning_rate": 9.996972800905368e-05, "loss": 1.8599, "step": 2150 }, { "epoch": 0.01, "learning_rate": 9.996958017571547e-05, "loss": 1.9491, "step": 2155 }, { "epoch": 0.01, "learning_rate": 9.996943198239363e-05, "loss": 1.8613, "step": 2160 }, { "epoch": 0.01, "learning_rate": 9.996928342908918e-05, "loss": 1.8701, "step": 2165 }, { "epoch": 0.01, "learning_rate": 9.996913451580319e-05, "loss": 1.8931, "step": 2170 }, { "epoch": 0.01, "learning_rate": 9.996898524253677e-05, "loss": 1.865, "step": 2175 }, { "epoch": 0.01, "learning_rate": 9.996883560929095e-05, "loss": 1.8603, "step": 2180 }, { "epoch": 0.01, "learning_rate": 9.996868561606687e-05, "loss": 1.843, "step": 2185 }, { "epoch": 0.01, "learning_rate": 9.996853526286554e-05, "loss": 1.8347, "step": 2190 }, { "epoch": 0.01, "learning_rate": 9.996838454968807e-05, "loss": 1.865, "step": 2195 }, { "epoch": 0.01, "learning_rate": 9.996823347653557e-05, "loss": 1.8254, "step": 2200 }, { "epoch": 0.01, "learning_rate": 9.996808204340911e-05, "loss": 1.8406, "step": 2205 }, { "epoch": 0.01, "learning_rate": 9.996793025030977e-05, "loss": 1.9007, "step": 2210 }, { "epoch": 0.01, "learning_rate": 9.996777809723865e-05, "loss": 1.8381, "step": 2215 }, { "epoch": 0.01, "learning_rate": 9.996762558419687e-05, "loss": 1.8306, "step": 2220 }, { "epoch": 0.01, "learning_rate": 9.996747271118549e-05, "loss": 1.9283, "step": 2225 }, { "epoch": 0.01, "learning_rate": 9.996731947820564e-05, "loss": 1.8786, "step": 2230 }, { "epoch": 0.01, "learning_rate": 9.996716588525841e-05, "loss": 1.8174, "step": 2235 }, { "epoch": 0.01, "learning_rate": 9.99670119323449e-05, "loss": 1.9248, "step": 2240 }, { "epoch": 0.01, "learning_rate": 9.996685761946625e-05, "loss": 1.8899, "step": 2245 }, { "epoch": 0.01, "learning_rate": 9.996670294662354e-05, "loss": 1.8465, "step": 2250 }, { "epoch": 0.01, "learning_rate": 9.996654791381788e-05, "loss": 1.7607, "step": 2255 }, { "epoch": 0.01, "learning_rate": 9.996639252105042e-05, "loss": 1.9033, "step": 2260 }, { "epoch": 0.01, "learning_rate": 9.996623676832226e-05, "loss": 1.7352, "step": 2265 }, { "epoch": 0.01, "learning_rate": 9.996608065563453e-05, "loss": 1.8316, "step": 2270 }, { "epoch": 0.01, "learning_rate": 9.996592418298834e-05, "loss": 1.8799, "step": 2275 }, { "epoch": 0.01, "learning_rate": 9.996576735038481e-05, "loss": 1.8005, "step": 2280 }, { "epoch": 0.01, "learning_rate": 9.996561015782511e-05, "loss": 1.8363, "step": 2285 }, { "epoch": 0.01, "learning_rate": 9.996545260531034e-05, "loss": 1.8291, "step": 2290 }, { "epoch": 0.01, "learning_rate": 9.996529469284163e-05, "loss": 1.823, "step": 2295 }, { "epoch": 0.01, "learning_rate": 9.996513642042015e-05, "loss": 1.8324, "step": 2300 }, { "epoch": 0.01, "learning_rate": 9.996497778804702e-05, "loss": 1.828, "step": 2305 }, { "epoch": 0.01, "learning_rate": 9.996481879572337e-05, "loss": 1.8444, "step": 2310 }, { "epoch": 0.01, "learning_rate": 9.996465944345036e-05, "loss": 1.8644, "step": 2315 }, { "epoch": 0.01, "learning_rate": 9.996449973122914e-05, "loss": 1.8466, "step": 2320 }, { "epoch": 0.01, "learning_rate": 9.996433965906085e-05, "loss": 1.7987, "step": 2325 }, { "epoch": 0.01, "learning_rate": 9.996417922694666e-05, "loss": 1.8211, "step": 2330 }, { "epoch": 0.01, "learning_rate": 9.996401843488772e-05, "loss": 1.9028, "step": 2335 }, { "epoch": 0.01, "learning_rate": 9.996385728288517e-05, "loss": 1.8029, "step": 2340 }, { "epoch": 0.01, "learning_rate": 9.99636957709402e-05, "loss": 1.8121, "step": 2345 }, { "epoch": 0.01, "learning_rate": 9.996353389905395e-05, "loss": 1.8565, "step": 2350 }, { "epoch": 0.01, "learning_rate": 9.99633716672276e-05, "loss": 1.7883, "step": 2355 }, { "epoch": 0.01, "learning_rate": 9.996320907546232e-05, "loss": 1.8175, "step": 2360 }, { "epoch": 0.01, "learning_rate": 9.996304612375926e-05, "loss": 1.8386, "step": 2365 }, { "epoch": 0.01, "learning_rate": 9.996288281211963e-05, "loss": 1.8528, "step": 2370 }, { "epoch": 0.01, "learning_rate": 9.996271914054457e-05, "loss": 1.8273, "step": 2375 }, { "epoch": 0.01, "learning_rate": 9.996255510903527e-05, "loss": 1.8634, "step": 2380 }, { "epoch": 0.01, "learning_rate": 9.996239071759293e-05, "loss": 1.771, "step": 2385 }, { "epoch": 0.01, "learning_rate": 9.996222596621872e-05, "loss": 1.8285, "step": 2390 }, { "epoch": 0.01, "learning_rate": 9.996206085491382e-05, "loss": 1.8446, "step": 2395 }, { "epoch": 0.01, "learning_rate": 9.996189538367943e-05, "loss": 1.8287, "step": 2400 }, { "epoch": 0.01, "learning_rate": 9.996172955251673e-05, "loss": 1.8809, "step": 2405 }, { "epoch": 0.01, "learning_rate": 9.996156336142694e-05, "loss": 1.8146, "step": 2410 }, { "epoch": 0.01, "learning_rate": 9.996139681041123e-05, "loss": 1.8439, "step": 2415 }, { "epoch": 0.01, "learning_rate": 9.996122989947081e-05, "loss": 1.837, "step": 2420 }, { "epoch": 0.01, "learning_rate": 9.996106262860689e-05, "loss": 1.8751, "step": 2425 }, { "epoch": 0.01, "learning_rate": 9.996089499782066e-05, "loss": 1.7994, "step": 2430 }, { "epoch": 0.01, "learning_rate": 9.996072700711335e-05, "loss": 1.7608, "step": 2435 }, { "epoch": 0.01, "learning_rate": 9.996055865648614e-05, "loss": 1.882, "step": 2440 }, { "epoch": 0.01, "learning_rate": 9.996038994594028e-05, "loss": 1.839, "step": 2445 }, { "epoch": 0.01, "learning_rate": 9.996022087547695e-05, "loss": 1.8721, "step": 2450 }, { "epoch": 0.01, "learning_rate": 9.99600514450974e-05, "loss": 1.8838, "step": 2455 }, { "epoch": 0.01, "learning_rate": 9.995988165480282e-05, "loss": 1.8227, "step": 2460 }, { "epoch": 0.01, "learning_rate": 9.995971150459443e-05, "loss": 1.8654, "step": 2465 }, { "epoch": 0.01, "learning_rate": 9.995954099447349e-05, "loss": 1.8499, "step": 2470 }, { "epoch": 0.01, "learning_rate": 9.99593701244412e-05, "loss": 1.8002, "step": 2475 }, { "epoch": 0.01, "learning_rate": 9.995919889449881e-05, "loss": 1.7824, "step": 2480 }, { "epoch": 0.01, "learning_rate": 9.995902730464754e-05, "loss": 1.8145, "step": 2485 }, { "epoch": 0.01, "learning_rate": 9.995885535488862e-05, "loss": 1.7825, "step": 2490 }, { "epoch": 0.01, "learning_rate": 9.995868304522332e-05, "loss": 1.828, "step": 2495 }, { "epoch": 0.01, "learning_rate": 9.995851037565285e-05, "loss": 1.8029, "step": 2500 }, { "epoch": 0.01, "learning_rate": 9.995833734617848e-05, "loss": 1.8015, "step": 2505 }, { "epoch": 0.01, "learning_rate": 9.995816395680142e-05, "loss": 1.8612, "step": 2510 }, { "epoch": 0.01, "learning_rate": 9.995799020752296e-05, "loss": 1.8807, "step": 2515 }, { "epoch": 0.01, "learning_rate": 9.995781609834432e-05, "loss": 1.8283, "step": 2520 }, { "epoch": 0.01, "learning_rate": 9.995764162926677e-05, "loss": 1.8188, "step": 2525 }, { "epoch": 0.01, "learning_rate": 9.995746680029157e-05, "loss": 1.8505, "step": 2530 }, { "epoch": 0.01, "learning_rate": 9.995729161141995e-05, "loss": 1.8024, "step": 2535 }, { "epoch": 0.01, "learning_rate": 9.995711606265321e-05, "loss": 1.8767, "step": 2540 }, { "epoch": 0.01, "learning_rate": 9.995694015399261e-05, "loss": 1.8533, "step": 2545 }, { "epoch": 0.01, "learning_rate": 9.99567638854394e-05, "loss": 1.8694, "step": 2550 }, { "epoch": 0.01, "learning_rate": 9.995658725699485e-05, "loss": 1.8472, "step": 2555 }, { "epoch": 0.01, "learning_rate": 9.995641026866025e-05, "loss": 1.7825, "step": 2560 }, { "epoch": 0.01, "learning_rate": 9.995623292043686e-05, "loss": 1.8257, "step": 2565 }, { "epoch": 0.01, "learning_rate": 9.995605521232595e-05, "loss": 1.7669, "step": 2570 }, { "epoch": 0.01, "learning_rate": 9.995587714432881e-05, "loss": 1.9023, "step": 2575 }, { "epoch": 0.01, "learning_rate": 9.995569871644675e-05, "loss": 1.8592, "step": 2580 }, { "epoch": 0.01, "learning_rate": 9.995551992868102e-05, "loss": 1.8062, "step": 2585 }, { "epoch": 0.01, "learning_rate": 9.99553407810329e-05, "loss": 1.8222, "step": 2590 }, { "epoch": 0.01, "learning_rate": 9.995516127350372e-05, "loss": 1.8272, "step": 2595 }, { "epoch": 0.01, "learning_rate": 9.995498140609475e-05, "loss": 1.7743, "step": 2600 }, { "epoch": 0.01, "learning_rate": 9.995480117880728e-05, "loss": 1.7396, "step": 2605 }, { "epoch": 0.01, "learning_rate": 9.995462059164262e-05, "loss": 1.8939, "step": 2610 }, { "epoch": 0.01, "learning_rate": 9.995443964460207e-05, "loss": 1.8199, "step": 2615 }, { "epoch": 0.01, "learning_rate": 9.995425833768692e-05, "loss": 1.7958, "step": 2620 }, { "epoch": 0.01, "learning_rate": 9.99540766708985e-05, "loss": 1.8013, "step": 2625 }, { "epoch": 0.01, "learning_rate": 9.99538946442381e-05, "loss": 1.8203, "step": 2630 }, { "epoch": 0.01, "learning_rate": 9.995371225770705e-05, "loss": 1.8239, "step": 2635 }, { "epoch": 0.01, "learning_rate": 9.995352951130664e-05, "loss": 1.8476, "step": 2640 }, { "epoch": 0.01, "learning_rate": 9.995334640503817e-05, "loss": 1.8316, "step": 2645 }, { "epoch": 0.01, "learning_rate": 9.995316293890302e-05, "loss": 1.8387, "step": 2650 }, { "epoch": 0.01, "learning_rate": 9.995297911290247e-05, "loss": 1.8352, "step": 2655 }, { "epoch": 0.01, "learning_rate": 9.995279492703786e-05, "loss": 1.8389, "step": 2660 }, { "epoch": 0.01, "learning_rate": 9.99526103813105e-05, "loss": 1.7688, "step": 2665 }, { "epoch": 0.01, "learning_rate": 9.995242547572172e-05, "loss": 1.8714, "step": 2670 }, { "epoch": 0.01, "learning_rate": 9.995224021027288e-05, "loss": 1.8015, "step": 2675 }, { "epoch": 0.01, "learning_rate": 9.995205458496529e-05, "loss": 1.8451, "step": 2680 }, { "epoch": 0.01, "learning_rate": 9.995186859980029e-05, "loss": 1.8016, "step": 2685 }, { "epoch": 0.01, "learning_rate": 9.995168225477922e-05, "loss": 1.8983, "step": 2690 }, { "epoch": 0.01, "learning_rate": 9.995149554990343e-05, "loss": 1.8027, "step": 2695 }, { "epoch": 0.01, "learning_rate": 9.995130848517426e-05, "loss": 1.8233, "step": 2700 }, { "epoch": 0.01, "learning_rate": 9.995112106059306e-05, "loss": 1.8725, "step": 2705 }, { "epoch": 0.01, "learning_rate": 9.995093327616116e-05, "loss": 1.8273, "step": 2710 }, { "epoch": 0.01, "learning_rate": 9.995074513187995e-05, "loss": 1.8224, "step": 2715 }, { "epoch": 0.01, "learning_rate": 9.995055662775076e-05, "loss": 1.8049, "step": 2720 }, { "epoch": 0.01, "learning_rate": 9.995036776377495e-05, "loss": 1.7678, "step": 2725 }, { "epoch": 0.01, "learning_rate": 9.99501785399539e-05, "loss": 1.8001, "step": 2730 }, { "epoch": 0.01, "learning_rate": 9.994998895628895e-05, "loss": 1.8751, "step": 2735 }, { "epoch": 0.01, "learning_rate": 9.994979901278146e-05, "loss": 1.8808, "step": 2740 }, { "epoch": 0.01, "learning_rate": 9.994960870943282e-05, "loss": 1.7868, "step": 2745 }, { "epoch": 0.01, "learning_rate": 9.99494180462444e-05, "loss": 1.88, "step": 2750 }, { "epoch": 0.01, "learning_rate": 9.994922702321757e-05, "loss": 1.819, "step": 2755 }, { "epoch": 0.01, "learning_rate": 9.99490356403537e-05, "loss": 1.8448, "step": 2760 }, { "epoch": 0.01, "learning_rate": 9.994884389765415e-05, "loss": 1.8158, "step": 2765 }, { "epoch": 0.01, "learning_rate": 9.994865179512034e-05, "loss": 1.8567, "step": 2770 }, { "epoch": 0.01, "learning_rate": 9.994845933275365e-05, "loss": 1.8836, "step": 2775 }, { "epoch": 0.02, "learning_rate": 9.994826651055543e-05, "loss": 1.8614, "step": 2780 }, { "epoch": 0.02, "learning_rate": 9.994807332852711e-05, "loss": 1.9092, "step": 2785 }, { "epoch": 0.02, "learning_rate": 9.994787978667005e-05, "loss": 1.7818, "step": 2790 }, { "epoch": 0.02, "learning_rate": 9.994768588498566e-05, "loss": 1.8971, "step": 2795 }, { "epoch": 0.02, "learning_rate": 9.994749162347534e-05, "loss": 1.7918, "step": 2800 }, { "epoch": 0.02, "learning_rate": 9.994729700214049e-05, "loss": 1.8786, "step": 2805 }, { "epoch": 0.02, "learning_rate": 9.994710202098249e-05, "loss": 1.9258, "step": 2810 }, { "epoch": 0.02, "learning_rate": 9.99469066800028e-05, "loss": 1.9057, "step": 2815 }, { "epoch": 0.02, "learning_rate": 9.994671097920274e-05, "loss": 1.7884, "step": 2820 }, { "epoch": 0.02, "learning_rate": 9.99465149185838e-05, "loss": 1.8208, "step": 2825 }, { "epoch": 0.02, "learning_rate": 9.994631849814735e-05, "loss": 1.8805, "step": 2830 }, { "epoch": 0.02, "learning_rate": 9.994612171789482e-05, "loss": 1.8603, "step": 2835 }, { "epoch": 0.02, "learning_rate": 9.994592457782761e-05, "loss": 1.8498, "step": 2840 }, { "epoch": 0.02, "learning_rate": 9.994572707794716e-05, "loss": 1.8123, "step": 2845 }, { "epoch": 0.02, "learning_rate": 9.994552921825489e-05, "loss": 1.8469, "step": 2850 }, { "epoch": 0.02, "learning_rate": 9.994533099875223e-05, "loss": 1.7835, "step": 2855 }, { "epoch": 0.02, "learning_rate": 9.994513241944059e-05, "loss": 1.8317, "step": 2860 }, { "epoch": 0.02, "learning_rate": 9.99449334803214e-05, "loss": 1.8858, "step": 2865 }, { "epoch": 0.02, "learning_rate": 9.994473418139614e-05, "loss": 1.8369, "step": 2870 }, { "epoch": 0.02, "learning_rate": 9.994453452266618e-05, "loss": 1.777, "step": 2875 }, { "epoch": 0.02, "learning_rate": 9.994433450413299e-05, "loss": 1.7554, "step": 2880 }, { "epoch": 0.02, "learning_rate": 9.994413412579802e-05, "loss": 1.8802, "step": 2885 }, { "epoch": 0.02, "learning_rate": 9.994393338766268e-05, "loss": 1.8327, "step": 2890 }, { "epoch": 0.02, "learning_rate": 9.994373228972846e-05, "loss": 1.808, "step": 2895 }, { "epoch": 0.02, "learning_rate": 9.994353083199678e-05, "loss": 1.8255, "step": 2900 }, { "epoch": 0.02, "learning_rate": 9.99433290144691e-05, "loss": 1.7777, "step": 2905 }, { "epoch": 0.02, "learning_rate": 9.994312683714687e-05, "loss": 1.8217, "step": 2910 }, { "epoch": 0.02, "learning_rate": 9.994292430003155e-05, "loss": 1.8371, "step": 2915 }, { "epoch": 0.02, "learning_rate": 9.99427214031246e-05, "loss": 1.8696, "step": 2920 }, { "epoch": 0.02, "learning_rate": 9.994251814642748e-05, "loss": 1.7111, "step": 2925 }, { "epoch": 0.02, "learning_rate": 9.994231452994166e-05, "loss": 1.9046, "step": 2930 }, { "epoch": 0.02, "learning_rate": 9.994211055366858e-05, "loss": 1.8683, "step": 2935 }, { "epoch": 0.02, "learning_rate": 9.994190621760974e-05, "loss": 1.795, "step": 2940 }, { "epoch": 0.02, "learning_rate": 9.99417015217666e-05, "loss": 1.7939, "step": 2945 }, { "epoch": 0.02, "learning_rate": 9.994149646614064e-05, "loss": 1.7851, "step": 2950 }, { "epoch": 0.02, "learning_rate": 9.994129105073333e-05, "loss": 1.8464, "step": 2955 }, { "epoch": 0.02, "learning_rate": 9.994108527554617e-05, "loss": 1.8609, "step": 2960 }, { "epoch": 0.02, "learning_rate": 9.994087914058061e-05, "loss": 1.8137, "step": 2965 }, { "epoch": 0.02, "learning_rate": 9.994067264583815e-05, "loss": 1.8702, "step": 2970 }, { "epoch": 0.02, "learning_rate": 9.994046579132029e-05, "loss": 1.8041, "step": 2975 }, { "epoch": 0.02, "learning_rate": 9.994025857702852e-05, "loss": 1.8061, "step": 2980 }, { "epoch": 0.02, "learning_rate": 9.994005100296428e-05, "loss": 1.8552, "step": 2985 }, { "epoch": 0.02, "learning_rate": 9.993984306912915e-05, "loss": 1.8787, "step": 2990 }, { "epoch": 0.02, "learning_rate": 9.993963477552458e-05, "loss": 1.8391, "step": 2995 }, { "epoch": 0.02, "learning_rate": 9.993942612215206e-05, "loss": 1.7974, "step": 3000 }, { "epoch": 0.02, "learning_rate": 9.99392171090131e-05, "loss": 1.814, "step": 3005 }, { "epoch": 0.02, "learning_rate": 9.993900773610923e-05, "loss": 1.8037, "step": 3010 }, { "epoch": 0.02, "learning_rate": 9.993879800344195e-05, "loss": 1.7985, "step": 3015 }, { "epoch": 0.02, "learning_rate": 9.993858791101275e-05, "loss": 1.8919, "step": 3020 }, { "epoch": 0.02, "learning_rate": 9.993837745882317e-05, "loss": 1.7859, "step": 3025 }, { "epoch": 0.02, "learning_rate": 9.99381666468747e-05, "loss": 1.8413, "step": 3030 }, { "epoch": 0.02, "learning_rate": 9.993795547516889e-05, "loss": 1.8596, "step": 3035 }, { "epoch": 0.02, "learning_rate": 9.993774394370723e-05, "loss": 1.8184, "step": 3040 }, { "epoch": 0.02, "learning_rate": 9.993753205249126e-05, "loss": 1.7863, "step": 3045 }, { "epoch": 0.02, "learning_rate": 9.99373198015225e-05, "loss": 1.7669, "step": 3050 }, { "epoch": 0.02, "learning_rate": 9.993710719080249e-05, "loss": 1.8672, "step": 3055 }, { "epoch": 0.02, "learning_rate": 9.993689422033275e-05, "loss": 1.8124, "step": 3060 }, { "epoch": 0.02, "learning_rate": 9.993668089011481e-05, "loss": 1.8182, "step": 3065 }, { "epoch": 0.02, "learning_rate": 9.993646720015024e-05, "loss": 1.8194, "step": 3070 }, { "epoch": 0.02, "learning_rate": 9.993625315044052e-05, "loss": 1.8445, "step": 3075 }, { "epoch": 0.02, "learning_rate": 9.993603874098727e-05, "loss": 1.8719, "step": 3080 }, { "epoch": 0.02, "learning_rate": 9.993582397179196e-05, "loss": 1.856, "step": 3085 }, { "epoch": 0.02, "learning_rate": 9.993560884285619e-05, "loss": 1.7792, "step": 3090 }, { "epoch": 0.02, "learning_rate": 9.993539335418147e-05, "loss": 1.8641, "step": 3095 }, { "epoch": 0.02, "learning_rate": 9.993517750576939e-05, "loss": 1.7992, "step": 3100 }, { "epoch": 0.02, "learning_rate": 9.993496129762148e-05, "loss": 1.7984, "step": 3105 }, { "epoch": 0.02, "learning_rate": 9.99347447297393e-05, "loss": 1.7618, "step": 3110 }, { "epoch": 0.02, "learning_rate": 9.99345278021244e-05, "loss": 1.8138, "step": 3115 }, { "epoch": 0.02, "learning_rate": 9.993431051477837e-05, "loss": 1.7794, "step": 3120 }, { "epoch": 0.02, "learning_rate": 9.993409286770277e-05, "loss": 1.8591, "step": 3125 }, { "epoch": 0.02, "learning_rate": 9.993387486089916e-05, "loss": 1.8151, "step": 3130 }, { "epoch": 0.02, "learning_rate": 9.99336564943691e-05, "loss": 1.8608, "step": 3135 }, { "epoch": 0.02, "learning_rate": 9.993343776811416e-05, "loss": 1.8061, "step": 3140 }, { "epoch": 0.02, "learning_rate": 9.993321868213596e-05, "loss": 1.7918, "step": 3145 }, { "epoch": 0.02, "learning_rate": 9.993299923643604e-05, "loss": 1.8684, "step": 3150 }, { "epoch": 0.02, "learning_rate": 9.993277943101598e-05, "loss": 1.7768, "step": 3155 }, { "epoch": 0.02, "learning_rate": 9.993255926587737e-05, "loss": 1.8067, "step": 3160 }, { "epoch": 0.02, "learning_rate": 9.993233874102181e-05, "loss": 1.8216, "step": 3165 }, { "epoch": 0.02, "learning_rate": 9.993211785645087e-05, "loss": 1.8348, "step": 3170 }, { "epoch": 0.02, "learning_rate": 9.993189661216615e-05, "loss": 1.7889, "step": 3175 }, { "epoch": 0.02, "learning_rate": 9.993167500816924e-05, "loss": 1.8317, "step": 3180 }, { "epoch": 0.02, "learning_rate": 9.993145304446174e-05, "loss": 1.8172, "step": 3185 }, { "epoch": 0.02, "learning_rate": 9.993123072104524e-05, "loss": 1.8289, "step": 3190 }, { "epoch": 0.02, "learning_rate": 9.993100803792136e-05, "loss": 1.7757, "step": 3195 }, { "epoch": 0.02, "learning_rate": 9.993078499509169e-05, "loss": 1.829, "step": 3200 }, { "epoch": 0.02, "learning_rate": 9.993056159255783e-05, "loss": 1.8348, "step": 3205 }, { "epoch": 0.02, "learning_rate": 9.993033783032142e-05, "loss": 1.8599, "step": 3210 }, { "epoch": 0.02, "learning_rate": 9.993011370838403e-05, "loss": 1.8848, "step": 3215 }, { "epoch": 0.02, "learning_rate": 9.992988922674731e-05, "loss": 1.8039, "step": 3220 }, { "epoch": 0.02, "learning_rate": 9.992966438541285e-05, "loss": 1.8022, "step": 3225 }, { "epoch": 0.02, "learning_rate": 9.99294391843823e-05, "loss": 1.8034, "step": 3230 }, { "epoch": 0.02, "learning_rate": 9.992921362365725e-05, "loss": 1.8238, "step": 3235 }, { "epoch": 0.02, "learning_rate": 9.992898770323936e-05, "loss": 1.8279, "step": 3240 }, { "epoch": 0.02, "learning_rate": 9.992876142313022e-05, "loss": 1.9189, "step": 3245 }, { "epoch": 0.02, "learning_rate": 9.992853478333148e-05, "loss": 1.8503, "step": 3250 }, { "epoch": 0.02, "learning_rate": 9.99283077838448e-05, "loss": 1.8122, "step": 3255 }, { "epoch": 0.02, "learning_rate": 9.992808042467175e-05, "loss": 1.8223, "step": 3260 }, { "epoch": 0.02, "learning_rate": 9.992785270581402e-05, "loss": 1.8419, "step": 3265 }, { "epoch": 0.02, "learning_rate": 9.992762462727322e-05, "loss": 1.7694, "step": 3270 }, { "epoch": 0.02, "learning_rate": 9.992739618905103e-05, "loss": 1.8581, "step": 3275 }, { "epoch": 0.02, "learning_rate": 9.992716739114904e-05, "loss": 1.7993, "step": 3280 }, { "epoch": 0.02, "learning_rate": 9.992693823356895e-05, "loss": 1.7929, "step": 3285 }, { "epoch": 0.02, "learning_rate": 9.99267087163124e-05, "loss": 1.7887, "step": 3290 }, { "epoch": 0.02, "learning_rate": 9.992647883938105e-05, "loss": 1.8725, "step": 3295 }, { "epoch": 0.02, "learning_rate": 9.992624860277651e-05, "loss": 1.8625, "step": 3300 }, { "epoch": 0.02, "learning_rate": 9.992601800650047e-05, "loss": 1.803, "step": 3305 }, { "epoch": 0.02, "learning_rate": 9.992578705055462e-05, "loss": 1.7543, "step": 3310 }, { "epoch": 0.02, "learning_rate": 9.992555573494059e-05, "loss": 1.818, "step": 3315 }, { "epoch": 0.02, "learning_rate": 9.992532405966004e-05, "loss": 1.8298, "step": 3320 }, { "epoch": 0.02, "learning_rate": 9.992509202471465e-05, "loss": 1.8706, "step": 3325 }, { "epoch": 0.02, "learning_rate": 9.99248596301061e-05, "loss": 1.7972, "step": 3330 }, { "epoch": 0.02, "learning_rate": 9.992462687583603e-05, "loss": 1.8423, "step": 3335 }, { "epoch": 0.02, "learning_rate": 9.992439376190619e-05, "loss": 1.8022, "step": 3340 }, { "epoch": 0.02, "learning_rate": 9.992416028831818e-05, "loss": 1.8161, "step": 3345 }, { "epoch": 0.02, "learning_rate": 9.992392645507372e-05, "loss": 1.8187, "step": 3350 }, { "epoch": 0.02, "learning_rate": 9.992369226217449e-05, "loss": 1.7939, "step": 3355 }, { "epoch": 0.02, "learning_rate": 9.99234577096222e-05, "loss": 1.817, "step": 3360 }, { "epoch": 0.02, "learning_rate": 9.992322279741848e-05, "loss": 1.8354, "step": 3365 }, { "epoch": 0.02, "learning_rate": 9.992298752556507e-05, "loss": 1.758, "step": 3370 }, { "epoch": 0.02, "learning_rate": 9.992275189406368e-05, "loss": 1.8542, "step": 3375 }, { "epoch": 0.02, "learning_rate": 9.992251590291595e-05, "loss": 1.8526, "step": 3380 }, { "epoch": 0.02, "learning_rate": 9.992227955212362e-05, "loss": 1.8337, "step": 3385 }, { "epoch": 0.02, "learning_rate": 9.992204284168838e-05, "loss": 1.7055, "step": 3390 }, { "epoch": 0.02, "learning_rate": 9.992180577161194e-05, "loss": 1.7715, "step": 3395 }, { "epoch": 0.02, "learning_rate": 9.992156834189603e-05, "loss": 1.8512, "step": 3400 }, { "epoch": 0.02, "learning_rate": 9.992133055254231e-05, "loss": 1.829, "step": 3405 }, { "epoch": 0.02, "learning_rate": 9.992109240355254e-05, "loss": 1.8308, "step": 3410 }, { "epoch": 0.02, "learning_rate": 9.99208538949284e-05, "loss": 1.9321, "step": 3415 }, { "epoch": 0.02, "learning_rate": 9.992061502667163e-05, "loss": 1.8753, "step": 3420 }, { "epoch": 0.02, "learning_rate": 9.992037579878394e-05, "loss": 1.8353, "step": 3425 }, { "epoch": 0.02, "learning_rate": 9.992013621126706e-05, "loss": 1.8749, "step": 3430 }, { "epoch": 0.02, "learning_rate": 9.991989626412272e-05, "loss": 1.8623, "step": 3435 }, { "epoch": 0.02, "learning_rate": 9.991965595735265e-05, "loss": 1.7777, "step": 3440 }, { "epoch": 0.02, "learning_rate": 9.991941529095857e-05, "loss": 1.8139, "step": 3445 }, { "epoch": 0.02, "learning_rate": 9.991917426494219e-05, "loss": 1.7999, "step": 3450 }, { "epoch": 0.02, "learning_rate": 9.99189328793053e-05, "loss": 1.8348, "step": 3455 }, { "epoch": 0.02, "learning_rate": 9.991869113404961e-05, "loss": 1.8058, "step": 3460 }, { "epoch": 0.02, "learning_rate": 9.991844902917688e-05, "loss": 1.8326, "step": 3465 }, { "epoch": 0.02, "learning_rate": 9.991820656468882e-05, "loss": 1.7735, "step": 3470 }, { "epoch": 0.02, "learning_rate": 9.99179637405872e-05, "loss": 1.8276, "step": 3475 }, { "epoch": 0.02, "learning_rate": 9.991772055687376e-05, "loss": 1.82, "step": 3480 }, { "epoch": 0.02, "learning_rate": 9.991747701355028e-05, "loss": 1.8212, "step": 3485 }, { "epoch": 0.02, "learning_rate": 9.991723311061847e-05, "loss": 1.7824, "step": 3490 }, { "epoch": 0.02, "learning_rate": 9.991698884808012e-05, "loss": 1.8395, "step": 3495 }, { "epoch": 0.02, "learning_rate": 9.991674422593696e-05, "loss": 1.9103, "step": 3500 }, { "epoch": 0.02, "learning_rate": 9.991649924419078e-05, "loss": 1.8473, "step": 3505 }, { "epoch": 0.02, "learning_rate": 9.991625390284334e-05, "loss": 1.8502, "step": 3510 }, { "epoch": 0.02, "learning_rate": 9.991600820189641e-05, "loss": 1.8387, "step": 3515 }, { "epoch": 0.02, "learning_rate": 9.991576214135175e-05, "loss": 1.794, "step": 3520 }, { "epoch": 0.02, "learning_rate": 9.991551572121112e-05, "loss": 1.7728, "step": 3525 }, { "epoch": 0.02, "learning_rate": 9.991526894147632e-05, "loss": 1.7518, "step": 3530 }, { "epoch": 0.02, "learning_rate": 9.991502180214912e-05, "loss": 1.7969, "step": 3535 }, { "epoch": 0.02, "learning_rate": 9.99147743032313e-05, "loss": 1.7915, "step": 3540 }, { "epoch": 0.02, "learning_rate": 9.991452644472464e-05, "loss": 1.8099, "step": 3545 }, { "epoch": 0.02, "learning_rate": 9.991427822663094e-05, "loss": 1.8337, "step": 3550 }, { "epoch": 0.02, "learning_rate": 9.991402964895196e-05, "loss": 1.8004, "step": 3555 }, { "epoch": 0.02, "learning_rate": 9.991378071168951e-05, "loss": 1.8021, "step": 3560 }, { "epoch": 0.02, "learning_rate": 9.991353141484537e-05, "loss": 1.7831, "step": 3565 }, { "epoch": 0.02, "learning_rate": 9.991328175842137e-05, "loss": 1.73, "step": 3570 }, { "epoch": 0.02, "learning_rate": 9.991303174241926e-05, "loss": 1.8185, "step": 3575 }, { "epoch": 0.02, "learning_rate": 9.991278136684087e-05, "loss": 1.8491, "step": 3580 }, { "epoch": 0.02, "learning_rate": 9.991253063168802e-05, "loss": 1.8421, "step": 3585 }, { "epoch": 0.02, "learning_rate": 9.991227953696247e-05, "loss": 1.8274, "step": 3590 }, { "epoch": 0.02, "learning_rate": 9.991202808266606e-05, "loss": 1.801, "step": 3595 }, { "epoch": 0.02, "learning_rate": 9.991177626880062e-05, "loss": 1.847, "step": 3600 }, { "epoch": 0.02, "learning_rate": 9.991152409536792e-05, "loss": 1.8734, "step": 3605 }, { "epoch": 0.02, "learning_rate": 9.991127156236979e-05, "loss": 1.8325, "step": 3610 }, { "epoch": 0.02, "learning_rate": 9.991101866980808e-05, "loss": 1.8362, "step": 3615 }, { "epoch": 0.02, "learning_rate": 9.991076541768455e-05, "loss": 1.8317, "step": 3620 }, { "epoch": 0.02, "learning_rate": 9.99105118060011e-05, "loss": 1.8535, "step": 3625 }, { "epoch": 0.02, "learning_rate": 9.991025783475951e-05, "loss": 1.8153, "step": 3630 }, { "epoch": 0.02, "learning_rate": 9.99100035039616e-05, "loss": 1.8389, "step": 3635 }, { "epoch": 0.02, "learning_rate": 9.990974881360925e-05, "loss": 1.8329, "step": 3640 }, { "epoch": 0.02, "learning_rate": 9.990949376370425e-05, "loss": 1.8342, "step": 3645 }, { "epoch": 0.02, "learning_rate": 9.990923835424844e-05, "loss": 1.8157, "step": 3650 }, { "epoch": 0.02, "learning_rate": 9.99089825852437e-05, "loss": 1.8181, "step": 3655 }, { "epoch": 0.02, "learning_rate": 9.990872645669183e-05, "loss": 1.809, "step": 3660 }, { "epoch": 0.02, "learning_rate": 9.990846996859469e-05, "loss": 1.7896, "step": 3665 }, { "epoch": 0.02, "learning_rate": 9.990821312095413e-05, "loss": 1.8391, "step": 3670 }, { "epoch": 0.02, "learning_rate": 9.9907955913772e-05, "loss": 1.8258, "step": 3675 }, { "epoch": 0.02, "learning_rate": 9.990769834705015e-05, "loss": 1.8594, "step": 3680 }, { "epoch": 0.02, "learning_rate": 9.990744042079046e-05, "loss": 1.7981, "step": 3685 }, { "epoch": 0.02, "learning_rate": 9.990718213499474e-05, "loss": 1.7522, "step": 3690 }, { "epoch": 0.02, "learning_rate": 9.990692348966488e-05, "loss": 1.7898, "step": 3695 }, { "epoch": 0.02, "learning_rate": 9.990666448480274e-05, "loss": 1.8227, "step": 3700 }, { "epoch": 0.02, "learning_rate": 9.99064051204102e-05, "loss": 1.8397, "step": 3705 }, { "epoch": 0.02, "learning_rate": 9.990614539648911e-05, "loss": 1.8238, "step": 3710 }, { "epoch": 0.02, "learning_rate": 9.990588531304133e-05, "loss": 1.8289, "step": 3715 }, { "epoch": 0.02, "learning_rate": 9.990562487006876e-05, "loss": 1.8229, "step": 3720 }, { "epoch": 0.02, "learning_rate": 9.990536406757326e-05, "loss": 1.8019, "step": 3725 }, { "epoch": 0.02, "learning_rate": 9.990510290555673e-05, "loss": 1.8074, "step": 3730 }, { "epoch": 0.02, "learning_rate": 9.9904841384021e-05, "loss": 1.8293, "step": 3735 }, { "epoch": 0.02, "learning_rate": 9.990457950296802e-05, "loss": 1.8099, "step": 3740 }, { "epoch": 0.02, "learning_rate": 9.990431726239964e-05, "loss": 1.8211, "step": 3745 }, { "epoch": 0.02, "learning_rate": 9.990405466231776e-05, "loss": 1.774, "step": 3750 }, { "epoch": 0.02, "learning_rate": 9.990379170272426e-05, "loss": 1.901, "step": 3755 }, { "epoch": 0.02, "learning_rate": 9.990352838362103e-05, "loss": 1.8491, "step": 3760 }, { "epoch": 0.02, "learning_rate": 9.990326470501e-05, "loss": 1.8756, "step": 3765 }, { "epoch": 0.02, "learning_rate": 9.990300066689302e-05, "loss": 1.7817, "step": 3770 }, { "epoch": 0.02, "learning_rate": 9.990273626927205e-05, "loss": 1.8911, "step": 3775 }, { "epoch": 0.02, "learning_rate": 9.990247151214895e-05, "loss": 1.827, "step": 3780 }, { "epoch": 0.02, "learning_rate": 9.990220639552565e-05, "loss": 1.826, "step": 3785 }, { "epoch": 0.02, "learning_rate": 9.990194091940404e-05, "loss": 1.8323, "step": 3790 }, { "epoch": 0.02, "learning_rate": 9.990167508378605e-05, "loss": 1.7743, "step": 3795 }, { "epoch": 0.02, "learning_rate": 9.990140888867359e-05, "loss": 1.8396, "step": 3800 }, { "epoch": 0.02, "learning_rate": 9.990114233406858e-05, "loss": 1.8606, "step": 3805 }, { "epoch": 0.02, "learning_rate": 9.990087541997292e-05, "loss": 1.8748, "step": 3810 }, { "epoch": 0.02, "learning_rate": 9.990060814638857e-05, "loss": 1.7799, "step": 3815 }, { "epoch": 0.02, "learning_rate": 9.990034051331744e-05, "loss": 1.8561, "step": 3820 }, { "epoch": 0.02, "learning_rate": 9.990007252076144e-05, "loss": 1.7316, "step": 3825 }, { "epoch": 0.02, "learning_rate": 9.989980416872253e-05, "loss": 1.816, "step": 3830 }, { "epoch": 0.02, "learning_rate": 9.989953545720261e-05, "loss": 1.8062, "step": 3835 }, { "epoch": 0.02, "learning_rate": 9.989926638620364e-05, "loss": 1.7991, "step": 3840 }, { "epoch": 0.02, "learning_rate": 9.989899695572755e-05, "loss": 1.8264, "step": 3845 }, { "epoch": 0.02, "learning_rate": 9.989872716577628e-05, "loss": 1.7549, "step": 3850 }, { "epoch": 0.02, "learning_rate": 9.989845701635179e-05, "loss": 1.8085, "step": 3855 }, { "epoch": 0.02, "learning_rate": 9.9898186507456e-05, "loss": 1.815, "step": 3860 }, { "epoch": 0.02, "learning_rate": 9.989791563909088e-05, "loss": 1.8673, "step": 3865 }, { "epoch": 0.02, "learning_rate": 9.989764441125837e-05, "loss": 1.7152, "step": 3870 }, { "epoch": 0.02, "learning_rate": 9.989737282396043e-05, "loss": 1.8228, "step": 3875 }, { "epoch": 0.02, "learning_rate": 9.989710087719902e-05, "loss": 1.81, "step": 3880 }, { "epoch": 0.02, "learning_rate": 9.989682857097607e-05, "loss": 1.8581, "step": 3885 }, { "epoch": 0.02, "learning_rate": 9.989655590529358e-05, "loss": 1.8139, "step": 3890 }, { "epoch": 0.02, "learning_rate": 9.989628288015349e-05, "loss": 1.79, "step": 3895 }, { "epoch": 0.02, "learning_rate": 9.989600949555778e-05, "loss": 1.8391, "step": 3900 }, { "epoch": 0.02, "learning_rate": 9.989573575150842e-05, "loss": 1.8155, "step": 3905 }, { "epoch": 0.02, "learning_rate": 9.989546164800736e-05, "loss": 1.8651, "step": 3910 }, { "epoch": 0.02, "learning_rate": 9.989518718505662e-05, "loss": 1.7705, "step": 3915 }, { "epoch": 0.02, "learning_rate": 9.989491236265812e-05, "loss": 1.808, "step": 3920 }, { "epoch": 0.02, "learning_rate": 9.989463718081389e-05, "loss": 1.863, "step": 3925 }, { "epoch": 0.02, "learning_rate": 9.989436163952588e-05, "loss": 1.8432, "step": 3930 }, { "epoch": 0.02, "learning_rate": 9.989408573879608e-05, "loss": 1.7729, "step": 3935 }, { "epoch": 0.02, "learning_rate": 9.98938094786265e-05, "loss": 1.8131, "step": 3940 }, { "epoch": 0.02, "learning_rate": 9.98935328590191e-05, "loss": 1.8489, "step": 3945 }, { "epoch": 0.02, "learning_rate": 9.989325587997589e-05, "loss": 1.8447, "step": 3950 }, { "epoch": 0.02, "learning_rate": 9.989297854149886e-05, "loss": 1.8243, "step": 3955 }, { "epoch": 0.02, "learning_rate": 9.989270084359002e-05, "loss": 1.7938, "step": 3960 }, { "epoch": 0.02, "learning_rate": 9.989242278625135e-05, "loss": 1.8198, "step": 3965 }, { "epoch": 0.02, "learning_rate": 9.989214436948486e-05, "loss": 1.7969, "step": 3970 }, { "epoch": 0.02, "learning_rate": 9.989186559329258e-05, "loss": 1.8133, "step": 3975 }, { "epoch": 0.02, "learning_rate": 9.989158645767648e-05, "loss": 1.7863, "step": 3980 }, { "epoch": 0.02, "learning_rate": 9.98913069626386e-05, "loss": 1.8381, "step": 3985 }, { "epoch": 0.02, "learning_rate": 9.989102710818092e-05, "loss": 1.7817, "step": 3990 }, { "epoch": 0.02, "learning_rate": 9.98907468943055e-05, "loss": 1.8591, "step": 3995 }, { "epoch": 0.02, "learning_rate": 9.989046632101434e-05, "loss": 1.8341, "step": 4000 }, { "epoch": 0.02, "learning_rate": 9.989018538830943e-05, "loss": 1.8408, "step": 4005 }, { "epoch": 0.02, "learning_rate": 9.988990409619285e-05, "loss": 1.8036, "step": 4010 }, { "epoch": 0.02, "learning_rate": 9.988962244466659e-05, "loss": 1.7921, "step": 4015 }, { "epoch": 0.02, "learning_rate": 9.988934043373268e-05, "loss": 1.8006, "step": 4020 }, { "epoch": 0.02, "learning_rate": 9.988905806339317e-05, "loss": 1.7879, "step": 4025 }, { "epoch": 0.02, "learning_rate": 9.988877533365008e-05, "loss": 1.7896, "step": 4030 }, { "epoch": 0.02, "learning_rate": 9.988849224450545e-05, "loss": 1.7246, "step": 4035 }, { "epoch": 0.02, "learning_rate": 9.988820879596132e-05, "loss": 1.7943, "step": 4040 }, { "epoch": 0.02, "learning_rate": 9.988792498801973e-05, "loss": 1.8805, "step": 4045 }, { "epoch": 0.02, "learning_rate": 9.988764082068272e-05, "loss": 1.7867, "step": 4050 }, { "epoch": 0.02, "learning_rate": 9.988735629395236e-05, "loss": 1.8479, "step": 4055 }, { "epoch": 0.02, "learning_rate": 9.988707140783067e-05, "loss": 1.7571, "step": 4060 }, { "epoch": 0.02, "learning_rate": 9.988678616231972e-05, "loss": 1.8953, "step": 4065 }, { "epoch": 0.02, "learning_rate": 9.988650055742155e-05, "loss": 1.8658, "step": 4070 }, { "epoch": 0.02, "learning_rate": 9.988621459313823e-05, "loss": 1.8458, "step": 4075 }, { "epoch": 0.02, "learning_rate": 9.988592826947182e-05, "loss": 1.835, "step": 4080 }, { "epoch": 0.02, "learning_rate": 9.988564158642439e-05, "loss": 1.8479, "step": 4085 }, { "epoch": 0.02, "learning_rate": 9.988535454399798e-05, "loss": 1.8446, "step": 4090 }, { "epoch": 0.02, "learning_rate": 9.98850671421947e-05, "loss": 1.8038, "step": 4095 }, { "epoch": 0.02, "learning_rate": 9.988477938101657e-05, "loss": 1.8214, "step": 4100 }, { "epoch": 0.02, "learning_rate": 9.988449126046569e-05, "loss": 1.7983, "step": 4105 }, { "epoch": 0.02, "learning_rate": 9.988420278054414e-05, "loss": 1.7604, "step": 4110 }, { "epoch": 0.02, "learning_rate": 9.9883913941254e-05, "loss": 1.7992, "step": 4115 }, { "epoch": 0.02, "learning_rate": 9.988362474259731e-05, "loss": 1.8404, "step": 4120 }, { "epoch": 0.02, "learning_rate": 9.988333518457623e-05, "loss": 1.8576, "step": 4125 }, { "epoch": 0.02, "learning_rate": 9.988304526719276e-05, "loss": 1.8234, "step": 4130 }, { "epoch": 0.02, "learning_rate": 9.988275499044906e-05, "loss": 1.8235, "step": 4135 }, { "epoch": 0.02, "learning_rate": 9.988246435434716e-05, "loss": 1.8128, "step": 4140 }, { "epoch": 0.02, "learning_rate": 9.98821733588892e-05, "loss": 1.7985, "step": 4145 }, { "epoch": 0.02, "learning_rate": 9.988188200407728e-05, "loss": 1.8129, "step": 4150 }, { "epoch": 0.02, "learning_rate": 9.988159028991347e-05, "loss": 1.7908, "step": 4155 }, { "epoch": 0.02, "learning_rate": 9.988129821639987e-05, "loss": 1.8077, "step": 4160 }, { "epoch": 0.02, "learning_rate": 9.98810057835386e-05, "loss": 1.7543, "step": 4165 }, { "epoch": 0.02, "learning_rate": 9.988071299133175e-05, "loss": 1.751, "step": 4170 }, { "epoch": 0.02, "learning_rate": 9.988041983978146e-05, "loss": 1.7828, "step": 4175 }, { "epoch": 0.02, "learning_rate": 9.988012632888981e-05, "loss": 1.7814, "step": 4180 }, { "epoch": 0.02, "learning_rate": 9.987983245865896e-05, "loss": 1.8086, "step": 4185 }, { "epoch": 0.02, "learning_rate": 9.987953822909095e-05, "loss": 1.7319, "step": 4190 }, { "epoch": 0.02, "learning_rate": 9.987924364018796e-05, "loss": 1.7809, "step": 4195 }, { "epoch": 0.02, "learning_rate": 9.987894869195211e-05, "loss": 1.7815, "step": 4200 }, { "epoch": 0.02, "learning_rate": 9.987865338438552e-05, "loss": 1.7543, "step": 4205 }, { "epoch": 0.02, "learning_rate": 9.987835771749028e-05, "loss": 1.7453, "step": 4210 }, { "epoch": 0.02, "learning_rate": 9.987806169126857e-05, "loss": 1.8725, "step": 4215 }, { "epoch": 0.02, "learning_rate": 9.987776530572249e-05, "loss": 1.8287, "step": 4220 }, { "epoch": 0.02, "learning_rate": 9.98774685608542e-05, "loss": 1.8613, "step": 4225 }, { "epoch": 0.02, "learning_rate": 9.987717145666582e-05, "loss": 1.7824, "step": 4230 }, { "epoch": 0.02, "learning_rate": 9.987687399315948e-05, "loss": 1.8349, "step": 4235 }, { "epoch": 0.02, "learning_rate": 9.987657617033736e-05, "loss": 1.7411, "step": 4240 }, { "epoch": 0.02, "learning_rate": 9.987627798820157e-05, "loss": 1.821, "step": 4245 }, { "epoch": 0.02, "learning_rate": 9.987597944675427e-05, "loss": 1.7545, "step": 4250 }, { "epoch": 0.02, "learning_rate": 9.987568054599762e-05, "loss": 1.8319, "step": 4255 }, { "epoch": 0.02, "learning_rate": 9.987538128593376e-05, "loss": 1.8756, "step": 4260 }, { "epoch": 0.02, "learning_rate": 9.987508166656486e-05, "loss": 1.8187, "step": 4265 }, { "epoch": 0.02, "learning_rate": 9.987478168789306e-05, "loss": 1.6989, "step": 4270 }, { "epoch": 0.02, "learning_rate": 9.987448134992053e-05, "loss": 1.8387, "step": 4275 }, { "epoch": 0.02, "learning_rate": 9.987418065264944e-05, "loss": 1.8027, "step": 4280 }, { "epoch": 0.02, "learning_rate": 9.987387959608196e-05, "loss": 1.7759, "step": 4285 }, { "epoch": 0.02, "learning_rate": 9.987357818022024e-05, "loss": 1.8326, "step": 4290 }, { "epoch": 0.02, "learning_rate": 9.987327640506646e-05, "loss": 1.7632, "step": 4295 }, { "epoch": 0.02, "learning_rate": 9.98729742706228e-05, "loss": 1.7919, "step": 4300 }, { "epoch": 0.02, "learning_rate": 9.987267177689143e-05, "loss": 1.7928, "step": 4305 }, { "epoch": 0.02, "learning_rate": 9.987236892387453e-05, "loss": 1.8197, "step": 4310 }, { "epoch": 0.02, "learning_rate": 9.987206571157428e-05, "loss": 1.7586, "step": 4315 }, { "epoch": 0.02, "learning_rate": 9.987176213999287e-05, "loss": 1.7941, "step": 4320 }, { "epoch": 0.02, "learning_rate": 9.987145820913248e-05, "loss": 1.7173, "step": 4325 }, { "epoch": 0.02, "learning_rate": 9.987115391899531e-05, "loss": 1.7833, "step": 4330 }, { "epoch": 0.02, "learning_rate": 9.987084926958355e-05, "loss": 1.8134, "step": 4335 }, { "epoch": 0.02, "learning_rate": 9.987054426089939e-05, "loss": 1.8041, "step": 4340 }, { "epoch": 0.02, "learning_rate": 9.987023889294502e-05, "loss": 1.8124, "step": 4345 }, { "epoch": 0.02, "learning_rate": 9.986993316572265e-05, "loss": 1.7733, "step": 4350 }, { "epoch": 0.02, "learning_rate": 9.986962707923449e-05, "loss": 1.8515, "step": 4355 }, { "epoch": 0.02, "learning_rate": 9.986932063348271e-05, "loss": 1.7394, "step": 4360 }, { "epoch": 0.02, "learning_rate": 9.986901382846955e-05, "loss": 1.8078, "step": 4365 }, { "epoch": 0.02, "learning_rate": 9.986870666419724e-05, "loss": 1.7497, "step": 4370 }, { "epoch": 0.02, "learning_rate": 9.986839914066795e-05, "loss": 1.7958, "step": 4375 }, { "epoch": 0.02, "learning_rate": 9.98680912578839e-05, "loss": 1.7757, "step": 4380 }, { "epoch": 0.02, "learning_rate": 9.986778301584734e-05, "loss": 1.8602, "step": 4385 }, { "epoch": 0.02, "learning_rate": 9.986747441456045e-05, "loss": 1.8535, "step": 4390 }, { "epoch": 0.02, "learning_rate": 9.986716545402548e-05, "loss": 1.7864, "step": 4395 }, { "epoch": 0.02, "learning_rate": 9.986685613424464e-05, "loss": 1.8496, "step": 4400 }, { "epoch": 0.02, "learning_rate": 9.98665464552202e-05, "loss": 1.7678, "step": 4405 }, { "epoch": 0.02, "learning_rate": 9.986623641695431e-05, "loss": 1.7678, "step": 4410 }, { "epoch": 0.02, "learning_rate": 9.986592601944928e-05, "loss": 1.8576, "step": 4415 }, { "epoch": 0.02, "learning_rate": 9.986561526270733e-05, "loss": 1.7673, "step": 4420 }, { "epoch": 0.02, "learning_rate": 9.986530414673066e-05, "loss": 1.7656, "step": 4425 }, { "epoch": 0.02, "learning_rate": 9.986499267152157e-05, "loss": 1.8152, "step": 4430 }, { "epoch": 0.02, "learning_rate": 9.986468083708223e-05, "loss": 1.7794, "step": 4435 }, { "epoch": 0.02, "learning_rate": 9.986436864341495e-05, "loss": 1.8235, "step": 4440 }, { "epoch": 0.02, "learning_rate": 9.986405609052196e-05, "loss": 1.8226, "step": 4445 }, { "epoch": 0.02, "learning_rate": 9.986374317840551e-05, "loss": 1.8177, "step": 4450 }, { "epoch": 0.02, "learning_rate": 9.986342990706785e-05, "loss": 1.8416, "step": 4455 }, { "epoch": 0.02, "learning_rate": 9.986311627651124e-05, "loss": 1.7914, "step": 4460 }, { "epoch": 0.02, "learning_rate": 9.986280228673794e-05, "loss": 1.8342, "step": 4465 }, { "epoch": 0.02, "learning_rate": 9.986248793775021e-05, "loss": 1.7609, "step": 4470 }, { "epoch": 0.02, "learning_rate": 9.986217322955032e-05, "loss": 1.8555, "step": 4475 }, { "epoch": 0.02, "learning_rate": 9.986185816214052e-05, "loss": 1.7175, "step": 4480 }, { "epoch": 0.02, "learning_rate": 9.986154273552311e-05, "loss": 1.8409, "step": 4485 }, { "epoch": 0.02, "learning_rate": 9.986122694970032e-05, "loss": 1.7931, "step": 4490 }, { "epoch": 0.02, "learning_rate": 9.986091080467447e-05, "loss": 1.7495, "step": 4495 }, { "epoch": 0.02, "learning_rate": 9.986059430044781e-05, "loss": 1.8433, "step": 4500 }, { "epoch": 0.02, "learning_rate": 9.986027743702264e-05, "loss": 1.8169, "step": 4505 }, { "epoch": 0.02, "learning_rate": 9.985996021440123e-05, "loss": 1.8428, "step": 4510 }, { "epoch": 0.02, "learning_rate": 9.985964263258585e-05, "loss": 1.841, "step": 4515 }, { "epoch": 0.02, "learning_rate": 9.985932469157882e-05, "loss": 1.7766, "step": 4520 }, { "epoch": 0.02, "learning_rate": 9.98590063913824e-05, "loss": 1.7656, "step": 4525 }, { "epoch": 0.02, "learning_rate": 9.985868773199892e-05, "loss": 1.7728, "step": 4530 }, { "epoch": 0.02, "learning_rate": 9.985836871343063e-05, "loss": 1.7047, "step": 4535 }, { "epoch": 0.02, "learning_rate": 9.985804933567986e-05, "loss": 1.7839, "step": 4540 }, { "epoch": 0.02, "learning_rate": 9.985772959874889e-05, "loss": 1.776, "step": 4545 }, { "epoch": 0.02, "learning_rate": 9.985740950264004e-05, "loss": 1.7784, "step": 4550 }, { "epoch": 0.02, "learning_rate": 9.985708904735562e-05, "loss": 1.7571, "step": 4555 }, { "epoch": 0.02, "learning_rate": 9.985676823289794e-05, "loss": 1.7435, "step": 4560 }, { "epoch": 0.02, "learning_rate": 9.985644705926928e-05, "loss": 1.8014, "step": 4565 }, { "epoch": 0.02, "learning_rate": 9.985612552647199e-05, "loss": 1.7797, "step": 4570 }, { "epoch": 0.02, "learning_rate": 9.985580363450834e-05, "loss": 1.8146, "step": 4575 }, { "epoch": 0.02, "learning_rate": 9.985548138338072e-05, "loss": 1.8263, "step": 4580 }, { "epoch": 0.02, "learning_rate": 9.98551587730914e-05, "loss": 1.8327, "step": 4585 }, { "epoch": 0.02, "learning_rate": 9.985483580364271e-05, "loss": 1.8192, "step": 4590 }, { "epoch": 0.02, "learning_rate": 9.985451247503699e-05, "loss": 1.8302, "step": 4595 }, { "epoch": 0.02, "learning_rate": 9.985418878727655e-05, "loss": 1.814, "step": 4600 }, { "epoch": 0.02, "learning_rate": 9.985386474036373e-05, "loss": 1.8311, "step": 4605 }, { "epoch": 0.02, "learning_rate": 9.985354033430088e-05, "loss": 1.7992, "step": 4610 }, { "epoch": 0.02, "learning_rate": 9.985321556909034e-05, "loss": 1.7677, "step": 4615 }, { "epoch": 0.02, "learning_rate": 9.98528904447344e-05, "loss": 1.7729, "step": 4620 }, { "epoch": 0.02, "learning_rate": 9.985256496123545e-05, "loss": 1.7827, "step": 4625 }, { "epoch": 0.03, "learning_rate": 9.985223911859582e-05, "loss": 1.681, "step": 4630 }, { "epoch": 0.03, "learning_rate": 9.985191291681787e-05, "loss": 1.8038, "step": 4635 }, { "epoch": 0.03, "learning_rate": 9.985158635590392e-05, "loss": 1.8358, "step": 4640 }, { "epoch": 0.03, "learning_rate": 9.985125943585637e-05, "loss": 1.8178, "step": 4645 }, { "epoch": 0.03, "learning_rate": 9.985093215667753e-05, "loss": 1.8244, "step": 4650 }, { "epoch": 0.03, "learning_rate": 9.985060451836977e-05, "loss": 1.7751, "step": 4655 }, { "epoch": 0.03, "learning_rate": 9.985027652093546e-05, "loss": 1.7607, "step": 4660 }, { "epoch": 0.03, "learning_rate": 9.984994816437695e-05, "loss": 1.8611, "step": 4665 }, { "epoch": 0.03, "learning_rate": 9.984961944869664e-05, "loss": 1.8574, "step": 4670 }, { "epoch": 0.03, "learning_rate": 9.984929037389686e-05, "loss": 1.809, "step": 4675 }, { "epoch": 0.03, "learning_rate": 9.984896093997997e-05, "loss": 1.7915, "step": 4680 }, { "epoch": 0.03, "learning_rate": 9.984863114694838e-05, "loss": 1.8838, "step": 4685 }, { "epoch": 0.03, "learning_rate": 9.984830099480445e-05, "loss": 1.7738, "step": 4690 }, { "epoch": 0.03, "learning_rate": 9.984797048355057e-05, "loss": 1.8362, "step": 4695 }, { "epoch": 0.03, "learning_rate": 9.98476396131891e-05, "loss": 1.8951, "step": 4700 }, { "epoch": 0.03, "learning_rate": 9.984730838372243e-05, "loss": 1.798, "step": 4705 }, { "epoch": 0.03, "learning_rate": 9.984697679515296e-05, "loss": 1.8033, "step": 4710 }, { "epoch": 0.03, "learning_rate": 9.984664484748306e-05, "loss": 1.8598, "step": 4715 }, { "epoch": 0.03, "learning_rate": 9.984631254071514e-05, "loss": 1.8139, "step": 4720 }, { "epoch": 0.03, "learning_rate": 9.984597987485158e-05, "loss": 1.8206, "step": 4725 }, { "epoch": 0.03, "learning_rate": 9.984564684989476e-05, "loss": 1.7699, "step": 4730 }, { "epoch": 0.03, "learning_rate": 9.984531346584713e-05, "loss": 1.7876, "step": 4735 }, { "epoch": 0.03, "learning_rate": 9.984497972271105e-05, "loss": 1.7865, "step": 4740 }, { "epoch": 0.03, "learning_rate": 9.984464562048894e-05, "loss": 1.8379, "step": 4745 }, { "epoch": 0.03, "learning_rate": 9.98443111591832e-05, "loss": 1.7752, "step": 4750 }, { "epoch": 0.03, "learning_rate": 9.984397633879625e-05, "loss": 1.7748, "step": 4755 }, { "epoch": 0.03, "learning_rate": 9.984364115933049e-05, "loss": 1.7872, "step": 4760 }, { "epoch": 0.03, "learning_rate": 9.984330562078835e-05, "loss": 1.7823, "step": 4765 }, { "epoch": 0.03, "learning_rate": 9.984296972317222e-05, "loss": 1.7359, "step": 4770 }, { "epoch": 0.03, "learning_rate": 9.984263346648453e-05, "loss": 1.8147, "step": 4775 }, { "epoch": 0.03, "learning_rate": 9.984229685072773e-05, "loss": 1.7765, "step": 4780 }, { "epoch": 0.03, "learning_rate": 9.98419598759042e-05, "loss": 1.8068, "step": 4785 }, { "epoch": 0.03, "learning_rate": 9.984162254201641e-05, "loss": 1.7603, "step": 4790 }, { "epoch": 0.03, "learning_rate": 9.984128484906676e-05, "loss": 1.8189, "step": 4795 }, { "epoch": 0.03, "learning_rate": 9.984094679705771e-05, "loss": 1.8301, "step": 4800 }, { "epoch": 0.03, "learning_rate": 9.984060838599167e-05, "loss": 1.7086, "step": 4805 }, { "epoch": 0.03, "learning_rate": 9.984026961587109e-05, "loss": 1.818, "step": 4810 }, { "epoch": 0.03, "learning_rate": 9.98399304866984e-05, "loss": 1.8206, "step": 4815 }, { "epoch": 0.03, "learning_rate": 9.983959099847606e-05, "loss": 1.8286, "step": 4820 }, { "epoch": 0.03, "learning_rate": 9.98392511512065e-05, "loss": 1.8376, "step": 4825 }, { "epoch": 0.03, "learning_rate": 9.983891094489217e-05, "loss": 1.7903, "step": 4830 }, { "epoch": 0.03, "learning_rate": 9.983857037953554e-05, "loss": 1.8068, "step": 4835 }, { "epoch": 0.03, "learning_rate": 9.983822945513905e-05, "loss": 1.8105, "step": 4840 }, { "epoch": 0.03, "learning_rate": 9.983788817170514e-05, "loss": 1.7582, "step": 4845 }, { "epoch": 0.03, "learning_rate": 9.98375465292363e-05, "loss": 1.837, "step": 4850 }, { "epoch": 0.03, "learning_rate": 9.983720452773497e-05, "loss": 1.8071, "step": 4855 }, { "epoch": 0.03, "learning_rate": 9.983686216720362e-05, "loss": 1.7561, "step": 4860 }, { "epoch": 0.03, "learning_rate": 9.98365194476447e-05, "loss": 1.7878, "step": 4865 }, { "epoch": 0.03, "learning_rate": 9.983617636906071e-05, "loss": 1.7472, "step": 4870 }, { "epoch": 0.03, "learning_rate": 9.983583293145411e-05, "loss": 1.8284, "step": 4875 }, { "epoch": 0.03, "learning_rate": 9.983548913482736e-05, "loss": 1.8031, "step": 4880 }, { "epoch": 0.03, "learning_rate": 9.983514497918296e-05, "loss": 1.728, "step": 4885 }, { "epoch": 0.03, "learning_rate": 9.983480046452335e-05, "loss": 1.7402, "step": 4890 }, { "epoch": 0.03, "learning_rate": 9.983445559085106e-05, "loss": 1.8168, "step": 4895 }, { "epoch": 0.03, "learning_rate": 9.983411035816855e-05, "loss": 1.7539, "step": 4900 }, { "epoch": 0.03, "learning_rate": 9.983376476647831e-05, "loss": 1.7714, "step": 4905 }, { "epoch": 0.03, "learning_rate": 9.983341881578283e-05, "loss": 1.818, "step": 4910 }, { "epoch": 0.03, "learning_rate": 9.98330725060846e-05, "loss": 1.7462, "step": 4915 }, { "epoch": 0.03, "learning_rate": 9.983272583738611e-05, "loss": 1.8592, "step": 4920 }, { "epoch": 0.03, "learning_rate": 9.983237880968986e-05, "loss": 1.8702, "step": 4925 }, { "epoch": 0.03, "learning_rate": 9.983203142299837e-05, "loss": 1.8228, "step": 4930 }, { "epoch": 0.03, "learning_rate": 9.983168367731412e-05, "loss": 1.7836, "step": 4935 }, { "epoch": 0.03, "learning_rate": 9.983133557263962e-05, "loss": 1.7666, "step": 4940 }, { "epoch": 0.03, "learning_rate": 9.983098710897739e-05, "loss": 1.776, "step": 4945 }, { "epoch": 0.03, "learning_rate": 9.98306382863299e-05, "loss": 1.9004, "step": 4950 }, { "epoch": 0.03, "learning_rate": 9.983028910469972e-05, "loss": 1.7742, "step": 4955 }, { "epoch": 0.03, "learning_rate": 9.982993956408933e-05, "loss": 1.7507, "step": 4960 }, { "epoch": 0.03, "learning_rate": 9.982958966450125e-05, "loss": 1.734, "step": 4965 }, { "epoch": 0.03, "learning_rate": 9.982923940593801e-05, "loss": 1.7275, "step": 4970 }, { "epoch": 0.03, "learning_rate": 9.982888878840214e-05, "loss": 1.8175, "step": 4975 }, { "epoch": 0.03, "learning_rate": 9.982853781189615e-05, "loss": 1.7886, "step": 4980 }, { "epoch": 0.03, "learning_rate": 9.982818647642258e-05, "loss": 1.8254, "step": 4985 }, { "epoch": 0.03, "learning_rate": 9.982783478198394e-05, "loss": 1.8108, "step": 4990 }, { "epoch": 0.03, "learning_rate": 9.98274827285828e-05, "loss": 1.7815, "step": 4995 }, { "epoch": 0.03, "learning_rate": 9.982713031622166e-05, "loss": 1.7034, "step": 5000 }, { "epoch": 0.03, "learning_rate": 9.982677754490309e-05, "loss": 1.8049, "step": 5005 }, { "epoch": 0.03, "learning_rate": 9.98264244146296e-05, "loss": 1.8438, "step": 5010 }, { "epoch": 0.03, "learning_rate": 9.982607092540375e-05, "loss": 1.6765, "step": 5015 }, { "epoch": 0.03, "learning_rate": 9.982571707722808e-05, "loss": 1.7659, "step": 5020 }, { "epoch": 0.03, "learning_rate": 9.982536287010515e-05, "loss": 1.7742, "step": 5025 }, { "epoch": 0.03, "learning_rate": 9.982500830403751e-05, "loss": 1.7312, "step": 5030 }, { "epoch": 0.03, "learning_rate": 9.982465337902773e-05, "loss": 1.8213, "step": 5035 }, { "epoch": 0.03, "learning_rate": 9.982429809507832e-05, "loss": 1.8159, "step": 5040 }, { "epoch": 0.03, "learning_rate": 9.982394245219189e-05, "loss": 1.7493, "step": 5045 }, { "epoch": 0.03, "learning_rate": 9.982358645037098e-05, "loss": 1.8145, "step": 5050 }, { "epoch": 0.03, "learning_rate": 9.982323008961813e-05, "loss": 1.8183, "step": 5055 }, { "epoch": 0.03, "learning_rate": 9.982287336993595e-05, "loss": 1.7524, "step": 5060 }, { "epoch": 0.03, "learning_rate": 9.982251629132699e-05, "loss": 1.7986, "step": 5065 }, { "epoch": 0.03, "learning_rate": 9.982215885379383e-05, "loss": 1.8462, "step": 5070 }, { "epoch": 0.03, "learning_rate": 9.982180105733902e-05, "loss": 1.8288, "step": 5075 }, { "epoch": 0.03, "learning_rate": 9.982144290196516e-05, "loss": 1.8189, "step": 5080 }, { "epoch": 0.03, "learning_rate": 9.982108438767483e-05, "loss": 1.7857, "step": 5085 }, { "epoch": 0.03, "learning_rate": 9.982072551447062e-05, "loss": 1.7768, "step": 5090 }, { "epoch": 0.03, "learning_rate": 9.982036628235509e-05, "loss": 1.7567, "step": 5095 }, { "epoch": 0.03, "learning_rate": 9.982000669133087e-05, "loss": 1.8292, "step": 5100 }, { "epoch": 0.03, "learning_rate": 9.981964674140049e-05, "loss": 1.7744, "step": 5105 }, { "epoch": 0.03, "learning_rate": 9.981928643256659e-05, "loss": 1.8271, "step": 5110 }, { "epoch": 0.03, "learning_rate": 9.981892576483176e-05, "loss": 1.7404, "step": 5115 }, { "epoch": 0.03, "learning_rate": 9.981856473819859e-05, "loss": 1.8037, "step": 5120 }, { "epoch": 0.03, "learning_rate": 9.981820335266968e-05, "loss": 1.8516, "step": 5125 }, { "epoch": 0.03, "learning_rate": 9.981784160824761e-05, "loss": 1.8874, "step": 5130 }, { "epoch": 0.03, "learning_rate": 9.981747950493504e-05, "loss": 1.7617, "step": 5135 }, { "epoch": 0.03, "learning_rate": 9.981711704273454e-05, "loss": 1.8305, "step": 5140 }, { "epoch": 0.03, "learning_rate": 9.981675422164873e-05, "loss": 1.8251, "step": 5145 }, { "epoch": 0.03, "learning_rate": 9.981639104168022e-05, "loss": 1.8758, "step": 5150 }, { "epoch": 0.03, "learning_rate": 9.981602750283163e-05, "loss": 1.7622, "step": 5155 }, { "epoch": 0.03, "learning_rate": 9.981566360510559e-05, "loss": 1.7816, "step": 5160 }, { "epoch": 0.03, "learning_rate": 9.98152993485047e-05, "loss": 1.7602, "step": 5165 }, { "epoch": 0.03, "learning_rate": 9.981493473303161e-05, "loss": 1.8168, "step": 5170 }, { "epoch": 0.03, "learning_rate": 9.981456975868891e-05, "loss": 1.8661, "step": 5175 }, { "epoch": 0.03, "learning_rate": 9.981420442547926e-05, "loss": 1.8342, "step": 5180 }, { "epoch": 0.03, "learning_rate": 9.981383873340528e-05, "loss": 1.798, "step": 5185 }, { "epoch": 0.03, "learning_rate": 9.981347268246962e-05, "loss": 1.7758, "step": 5190 }, { "epoch": 0.03, "learning_rate": 9.98131062726749e-05, "loss": 1.7259, "step": 5195 }, { "epoch": 0.03, "learning_rate": 9.981273950402377e-05, "loss": 1.8599, "step": 5200 }, { "epoch": 0.03, "learning_rate": 9.981237237651886e-05, "loss": 1.7726, "step": 5205 }, { "epoch": 0.03, "learning_rate": 9.981200489016282e-05, "loss": 1.7315, "step": 5210 }, { "epoch": 0.03, "learning_rate": 9.98116370449583e-05, "loss": 1.7804, "step": 5215 }, { "epoch": 0.03, "learning_rate": 9.981126884090795e-05, "loss": 1.7867, "step": 5220 }, { "epoch": 0.03, "learning_rate": 9.98109002780144e-05, "loss": 1.7916, "step": 5225 }, { "epoch": 0.03, "learning_rate": 9.981053135628035e-05, "loss": 1.8357, "step": 5230 }, { "epoch": 0.03, "learning_rate": 9.981016207570844e-05, "loss": 1.7947, "step": 5235 }, { "epoch": 0.03, "learning_rate": 9.98097924363013e-05, "loss": 1.7658, "step": 5240 }, { "epoch": 0.03, "learning_rate": 9.980942243806164e-05, "loss": 1.8477, "step": 5245 }, { "epoch": 0.03, "learning_rate": 9.98090520809921e-05, "loss": 1.7399, "step": 5250 }, { "epoch": 0.03, "learning_rate": 9.980868136509533e-05, "loss": 1.8349, "step": 5255 }, { "epoch": 0.03, "learning_rate": 9.980831029037405e-05, "loss": 1.8178, "step": 5260 }, { "epoch": 0.03, "learning_rate": 9.980793885683089e-05, "loss": 1.7942, "step": 5265 }, { "epoch": 0.03, "learning_rate": 9.980756706446854e-05, "loss": 1.8385, "step": 5270 }, { "epoch": 0.03, "learning_rate": 9.980719491328968e-05, "loss": 1.825, "step": 5275 }, { "epoch": 0.03, "learning_rate": 9.980682240329699e-05, "loss": 1.8927, "step": 5280 }, { "epoch": 0.03, "learning_rate": 9.980644953449317e-05, "loss": 1.794, "step": 5285 }, { "epoch": 0.03, "learning_rate": 9.980607630688086e-05, "loss": 1.7455, "step": 5290 }, { "epoch": 0.03, "learning_rate": 9.980570272046279e-05, "loss": 1.8264, "step": 5295 }, { "epoch": 0.03, "learning_rate": 9.980532877524165e-05, "loss": 1.7617, "step": 5300 }, { "epoch": 0.03, "learning_rate": 9.980495447122011e-05, "loss": 1.7343, "step": 5305 }, { "epoch": 0.03, "learning_rate": 9.980457980840088e-05, "loss": 1.6987, "step": 5310 }, { "epoch": 0.03, "learning_rate": 9.980420478678667e-05, "loss": 1.8103, "step": 5315 }, { "epoch": 0.03, "learning_rate": 9.980382940638018e-05, "loss": 1.7961, "step": 5320 }, { "epoch": 0.03, "learning_rate": 9.98034536671841e-05, "loss": 1.7676, "step": 5325 }, { "epoch": 0.03, "learning_rate": 9.980307756920112e-05, "loss": 1.7728, "step": 5330 }, { "epoch": 0.03, "learning_rate": 9.9802701112434e-05, "loss": 1.7633, "step": 5335 }, { "epoch": 0.03, "learning_rate": 9.98023242968854e-05, "loss": 1.8056, "step": 5340 }, { "epoch": 0.03, "learning_rate": 9.980194712255806e-05, "loss": 1.8289, "step": 5345 }, { "epoch": 0.03, "learning_rate": 9.98015695894547e-05, "loss": 1.7398, "step": 5350 }, { "epoch": 0.03, "learning_rate": 9.980119169757805e-05, "loss": 1.7954, "step": 5355 }, { "epoch": 0.03, "learning_rate": 9.98008134469308e-05, "loss": 1.7978, "step": 5360 }, { "epoch": 0.03, "learning_rate": 9.980043483751571e-05, "loss": 1.6968, "step": 5365 }, { "epoch": 0.03, "learning_rate": 9.980005586933547e-05, "loss": 1.7691, "step": 5370 }, { "epoch": 0.03, "learning_rate": 9.979967654239284e-05, "loss": 1.7622, "step": 5375 }, { "epoch": 0.03, "learning_rate": 9.979929685669054e-05, "loss": 1.8052, "step": 5380 }, { "epoch": 0.03, "learning_rate": 9.979891681223131e-05, "loss": 1.8023, "step": 5385 }, { "epoch": 0.03, "learning_rate": 9.979853640901788e-05, "loss": 1.7941, "step": 5390 }, { "epoch": 0.03, "learning_rate": 9.979815564705302e-05, "loss": 1.7377, "step": 5395 }, { "epoch": 0.03, "learning_rate": 9.979777452633943e-05, "loss": 1.8067, "step": 5400 }, { "epoch": 0.03, "learning_rate": 9.979739304687987e-05, "loss": 1.7322, "step": 5405 }, { "epoch": 0.03, "learning_rate": 9.97970112086771e-05, "loss": 1.7198, "step": 5410 }, { "epoch": 0.03, "learning_rate": 9.979662901173387e-05, "loss": 1.8021, "step": 5415 }, { "epoch": 0.03, "learning_rate": 9.979624645605292e-05, "loss": 1.8074, "step": 5420 }, { "epoch": 0.03, "learning_rate": 9.979586354163702e-05, "loss": 1.7696, "step": 5425 }, { "epoch": 0.03, "learning_rate": 9.979548026848891e-05, "loss": 1.7886, "step": 5430 }, { "epoch": 0.03, "learning_rate": 9.979509663661137e-05, "loss": 1.7774, "step": 5435 }, { "epoch": 0.03, "learning_rate": 9.979471264600715e-05, "loss": 1.8361, "step": 5440 }, { "epoch": 0.03, "learning_rate": 9.979432829667903e-05, "loss": 1.8218, "step": 5445 }, { "epoch": 0.03, "learning_rate": 9.979394358862977e-05, "loss": 1.7533, "step": 5450 }, { "epoch": 0.03, "learning_rate": 9.979355852186214e-05, "loss": 1.8781, "step": 5455 }, { "epoch": 0.03, "learning_rate": 9.979317309637892e-05, "loss": 1.812, "step": 5460 }, { "epoch": 0.03, "learning_rate": 9.979278731218289e-05, "loss": 1.7464, "step": 5465 }, { "epoch": 0.03, "learning_rate": 9.979240116927682e-05, "loss": 1.7449, "step": 5470 }, { "epoch": 0.03, "learning_rate": 9.97920146676635e-05, "loss": 1.6991, "step": 5475 }, { "epoch": 0.03, "learning_rate": 9.97916278073457e-05, "loss": 1.8229, "step": 5480 }, { "epoch": 0.03, "learning_rate": 9.979124058832623e-05, "loss": 1.7238, "step": 5485 }, { "epoch": 0.03, "learning_rate": 9.979085301060785e-05, "loss": 1.8166, "step": 5490 }, { "epoch": 0.03, "learning_rate": 9.979046507419337e-05, "loss": 1.8683, "step": 5495 }, { "epoch": 0.03, "learning_rate": 9.979007677908559e-05, "loss": 1.7632, "step": 5500 }, { "epoch": 0.03, "learning_rate": 9.978968812528729e-05, "loss": 1.8529, "step": 5505 }, { "epoch": 0.03, "learning_rate": 9.978929911280128e-05, "loss": 1.799, "step": 5510 }, { "epoch": 0.03, "learning_rate": 9.978890974163038e-05, "loss": 1.6964, "step": 5515 }, { "epoch": 0.03, "learning_rate": 9.978852001177736e-05, "loss": 1.7772, "step": 5520 }, { "epoch": 0.03, "learning_rate": 9.978812992324506e-05, "loss": 1.7691, "step": 5525 }, { "epoch": 0.03, "learning_rate": 9.978773947603626e-05, "loss": 1.8673, "step": 5530 }, { "epoch": 0.03, "learning_rate": 9.97873486701538e-05, "loss": 1.8249, "step": 5535 }, { "epoch": 0.03, "learning_rate": 9.978695750560048e-05, "loss": 1.7985, "step": 5540 }, { "epoch": 0.03, "learning_rate": 9.978656598237912e-05, "loss": 1.8155, "step": 5545 }, { "epoch": 0.03, "learning_rate": 9.978617410049254e-05, "loss": 1.7811, "step": 5550 }, { "epoch": 0.03, "learning_rate": 9.978578185994356e-05, "loss": 1.8012, "step": 5555 }, { "epoch": 0.03, "learning_rate": 9.978538926073502e-05, "loss": 1.7708, "step": 5560 }, { "epoch": 0.03, "learning_rate": 9.978499630286973e-05, "loss": 1.7376, "step": 5565 }, { "epoch": 0.03, "learning_rate": 9.978460298635055e-05, "loss": 1.7735, "step": 5570 }, { "epoch": 0.03, "learning_rate": 9.978420931118028e-05, "loss": 1.7435, "step": 5575 }, { "epoch": 0.03, "learning_rate": 9.978381527736176e-05, "loss": 1.8387, "step": 5580 }, { "epoch": 0.03, "learning_rate": 9.978342088489785e-05, "loss": 1.8615, "step": 5585 }, { "epoch": 0.03, "learning_rate": 9.978302613379138e-05, "loss": 1.7853, "step": 5590 }, { "epoch": 0.03, "learning_rate": 9.978263102404519e-05, "loss": 1.7539, "step": 5595 }, { "epoch": 0.03, "learning_rate": 9.978223555566214e-05, "loss": 1.7715, "step": 5600 }, { "epoch": 0.03, "learning_rate": 9.978183972864506e-05, "loss": 1.8332, "step": 5605 }, { "epoch": 0.03, "learning_rate": 9.97814435429968e-05, "loss": 1.7915, "step": 5610 }, { "epoch": 0.03, "learning_rate": 9.978104699872024e-05, "loss": 1.753, "step": 5615 }, { "epoch": 0.03, "learning_rate": 9.978065009581821e-05, "loss": 1.7694, "step": 5620 }, { "epoch": 0.03, "learning_rate": 9.978025283429359e-05, "loss": 1.7816, "step": 5625 }, { "epoch": 0.03, "learning_rate": 9.977985521414923e-05, "loss": 1.7629, "step": 5630 }, { "epoch": 0.03, "learning_rate": 9.9779457235388e-05, "loss": 1.7797, "step": 5635 }, { "epoch": 0.03, "learning_rate": 9.977905889801275e-05, "loss": 1.8199, "step": 5640 }, { "epoch": 0.03, "learning_rate": 9.977866020202638e-05, "loss": 1.8683, "step": 5645 }, { "epoch": 0.03, "learning_rate": 9.977826114743173e-05, "loss": 1.7849, "step": 5650 }, { "epoch": 0.03, "learning_rate": 9.977786173423169e-05, "loss": 1.7483, "step": 5655 }, { "epoch": 0.03, "learning_rate": 9.977746196242914e-05, "loss": 1.7762, "step": 5660 }, { "epoch": 0.03, "learning_rate": 9.977706183202697e-05, "loss": 1.7846, "step": 5665 }, { "epoch": 0.03, "learning_rate": 9.977666134302806e-05, "loss": 1.778, "step": 5670 }, { "epoch": 0.03, "learning_rate": 9.977626049543526e-05, "loss": 1.8515, "step": 5675 }, { "epoch": 0.03, "learning_rate": 9.977585928925149e-05, "loss": 1.7685, "step": 5680 }, { "epoch": 0.03, "learning_rate": 9.977545772447964e-05, "loss": 1.7786, "step": 5685 }, { "epoch": 0.03, "learning_rate": 9.977505580112258e-05, "loss": 1.7679, "step": 5690 }, { "epoch": 0.03, "learning_rate": 9.977465351918325e-05, "loss": 1.7657, "step": 5695 }, { "epoch": 0.03, "learning_rate": 9.977425087866449e-05, "loss": 1.8085, "step": 5700 }, { "epoch": 0.03, "learning_rate": 9.977384787956924e-05, "loss": 1.7745, "step": 5705 }, { "epoch": 0.03, "learning_rate": 9.977344452190042e-05, "loss": 1.8668, "step": 5710 }, { "epoch": 0.03, "learning_rate": 9.977304080566089e-05, "loss": 1.8124, "step": 5715 }, { "epoch": 0.03, "learning_rate": 9.977263673085357e-05, "loss": 1.7824, "step": 5720 }, { "epoch": 0.03, "learning_rate": 9.977223229748139e-05, "loss": 1.86, "step": 5725 }, { "epoch": 0.03, "learning_rate": 9.977182750554726e-05, "loss": 1.8216, "step": 5730 }, { "epoch": 0.03, "learning_rate": 9.977142235505407e-05, "loss": 1.8624, "step": 5735 }, { "epoch": 0.03, "learning_rate": 9.977101684600477e-05, "loss": 1.7334, "step": 5740 }, { "epoch": 0.03, "learning_rate": 9.977061097840226e-05, "loss": 1.8007, "step": 5745 }, { "epoch": 0.03, "learning_rate": 9.977020475224947e-05, "loss": 1.7465, "step": 5750 }, { "epoch": 0.03, "learning_rate": 9.976979816754935e-05, "loss": 1.7035, "step": 5755 }, { "epoch": 0.03, "learning_rate": 9.97693912243048e-05, "loss": 1.7664, "step": 5760 }, { "epoch": 0.03, "learning_rate": 9.976898392251875e-05, "loss": 1.7293, "step": 5765 }, { "epoch": 0.03, "learning_rate": 9.976857626219414e-05, "loss": 1.7946, "step": 5770 }, { "epoch": 0.03, "learning_rate": 9.976816824333392e-05, "loss": 1.812, "step": 5775 }, { "epoch": 0.03, "learning_rate": 9.976775986594102e-05, "loss": 1.7573, "step": 5780 }, { "epoch": 0.03, "learning_rate": 9.976735113001838e-05, "loss": 1.7939, "step": 5785 }, { "epoch": 0.03, "learning_rate": 9.976694203556896e-05, "loss": 1.8081, "step": 5790 }, { "epoch": 0.03, "learning_rate": 9.976653258259568e-05, "loss": 1.7856, "step": 5795 }, { "epoch": 0.03, "learning_rate": 9.97661227711015e-05, "loss": 1.7683, "step": 5800 }, { "epoch": 0.03, "learning_rate": 9.97657126010894e-05, "loss": 1.7836, "step": 5805 }, { "epoch": 0.03, "learning_rate": 9.97653020725623e-05, "loss": 1.8141, "step": 5810 }, { "epoch": 0.03, "learning_rate": 9.976489118552315e-05, "loss": 1.8185, "step": 5815 }, { "epoch": 0.03, "learning_rate": 9.976447993997494e-05, "loss": 1.7804, "step": 5820 }, { "epoch": 0.03, "learning_rate": 9.976406833592062e-05, "loss": 1.8226, "step": 5825 }, { "epoch": 0.03, "learning_rate": 9.976365637336316e-05, "loss": 1.8621, "step": 5830 }, { "epoch": 0.03, "learning_rate": 9.976324405230553e-05, "loss": 1.8467, "step": 5835 }, { "epoch": 0.03, "learning_rate": 9.976283137275068e-05, "loss": 1.8133, "step": 5840 }, { "epoch": 0.03, "learning_rate": 9.976241833470161e-05, "loss": 1.8176, "step": 5845 }, { "epoch": 0.03, "learning_rate": 9.976200493816127e-05, "loss": 1.6956, "step": 5850 }, { "epoch": 0.03, "learning_rate": 9.976159118313266e-05, "loss": 1.8148, "step": 5855 }, { "epoch": 0.03, "learning_rate": 9.976117706961876e-05, "loss": 1.7799, "step": 5860 }, { "epoch": 0.03, "learning_rate": 9.976076259762252e-05, "loss": 1.8063, "step": 5865 }, { "epoch": 0.03, "learning_rate": 9.976034776714698e-05, "loss": 1.7805, "step": 5870 }, { "epoch": 0.03, "learning_rate": 9.975993257819509e-05, "loss": 1.7492, "step": 5875 }, { "epoch": 0.03, "learning_rate": 9.975951703076985e-05, "loss": 1.8141, "step": 5880 }, { "epoch": 0.03, "learning_rate": 9.975910112487424e-05, "loss": 1.8381, "step": 5885 }, { "epoch": 0.03, "learning_rate": 9.975868486051128e-05, "loss": 1.7599, "step": 5890 }, { "epoch": 0.03, "learning_rate": 9.975826823768397e-05, "loss": 1.7494, "step": 5895 }, { "epoch": 0.03, "learning_rate": 9.97578512563953e-05, "loss": 1.7894, "step": 5900 }, { "epoch": 0.03, "learning_rate": 9.975743391664826e-05, "loss": 1.7237, "step": 5905 }, { "epoch": 0.03, "learning_rate": 9.975701621844587e-05, "loss": 1.743, "step": 5910 }, { "epoch": 0.03, "learning_rate": 9.975659816179115e-05, "loss": 1.8045, "step": 5915 }, { "epoch": 0.03, "learning_rate": 9.97561797466871e-05, "loss": 1.6844, "step": 5920 }, { "epoch": 0.03, "learning_rate": 9.975576097313674e-05, "loss": 1.7342, "step": 5925 }, { "epoch": 0.03, "learning_rate": 9.975534184114308e-05, "loss": 1.7454, "step": 5930 }, { "epoch": 0.03, "learning_rate": 9.975492235070914e-05, "loss": 1.8037, "step": 5935 }, { "epoch": 0.03, "learning_rate": 9.975450250183793e-05, "loss": 1.7829, "step": 5940 }, { "epoch": 0.03, "learning_rate": 9.975408229453251e-05, "loss": 1.7787, "step": 5945 }, { "epoch": 0.03, "learning_rate": 9.975366172879588e-05, "loss": 1.8243, "step": 5950 }, { "epoch": 0.03, "learning_rate": 9.975324080463108e-05, "loss": 1.7313, "step": 5955 }, { "epoch": 0.03, "learning_rate": 9.975281952204115e-05, "loss": 1.8594, "step": 5960 }, { "epoch": 0.03, "learning_rate": 9.975239788102908e-05, "loss": 1.8273, "step": 5965 }, { "epoch": 0.03, "learning_rate": 9.975197588159797e-05, "loss": 1.7804, "step": 5970 }, { "epoch": 0.03, "learning_rate": 9.975155352375082e-05, "loss": 1.8493, "step": 5975 }, { "epoch": 0.03, "learning_rate": 9.975113080749068e-05, "loss": 1.8164, "step": 5980 }, { "epoch": 0.03, "learning_rate": 9.97507077328206e-05, "loss": 1.7224, "step": 5985 }, { "epoch": 0.03, "learning_rate": 9.975028429974363e-05, "loss": 1.7857, "step": 5990 }, { "epoch": 0.03, "learning_rate": 9.974986050826283e-05, "loss": 1.8197, "step": 5995 }, { "epoch": 0.03, "learning_rate": 9.974943635838121e-05, "loss": 1.7624, "step": 6000 }, { "epoch": 0.03, "learning_rate": 9.974901185010188e-05, "loss": 1.7499, "step": 6005 }, { "epoch": 0.03, "learning_rate": 9.974858698342787e-05, "loss": 1.8885, "step": 6010 }, { "epoch": 0.03, "learning_rate": 9.974816175836225e-05, "loss": 1.7028, "step": 6015 }, { "epoch": 0.03, "learning_rate": 9.974773617490807e-05, "loss": 1.7452, "step": 6020 }, { "epoch": 0.03, "learning_rate": 9.974731023306841e-05, "loss": 1.7934, "step": 6025 }, { "epoch": 0.03, "learning_rate": 9.974688393284633e-05, "loss": 1.7857, "step": 6030 }, { "epoch": 0.03, "learning_rate": 9.97464572742449e-05, "loss": 1.7688, "step": 6035 }, { "epoch": 0.03, "learning_rate": 9.974603025726718e-05, "loss": 1.808, "step": 6040 }, { "epoch": 0.03, "learning_rate": 9.97456028819163e-05, "loss": 1.8324, "step": 6045 }, { "epoch": 0.03, "learning_rate": 9.974517514819528e-05, "loss": 1.7574, "step": 6050 }, { "epoch": 0.03, "learning_rate": 9.974474705610724e-05, "loss": 1.7991, "step": 6055 }, { "epoch": 0.03, "learning_rate": 9.974431860565523e-05, "loss": 1.7949, "step": 6060 }, { "epoch": 0.03, "learning_rate": 9.974388979684235e-05, "loss": 1.8316, "step": 6065 }, { "epoch": 0.03, "learning_rate": 9.97434606296717e-05, "loss": 1.8062, "step": 6070 }, { "epoch": 0.03, "learning_rate": 9.974303110414638e-05, "loss": 1.8587, "step": 6075 }, { "epoch": 0.03, "learning_rate": 9.974260122026946e-05, "loss": 1.7245, "step": 6080 }, { "epoch": 0.03, "learning_rate": 9.974217097804404e-05, "loss": 1.8185, "step": 6085 }, { "epoch": 0.03, "learning_rate": 9.974174037747323e-05, "loss": 1.7619, "step": 6090 }, { "epoch": 0.03, "learning_rate": 9.974130941856014e-05, "loss": 1.7714, "step": 6095 }, { "epoch": 0.03, "learning_rate": 9.974087810130786e-05, "loss": 1.7464, "step": 6100 }, { "epoch": 0.03, "learning_rate": 9.974044642571949e-05, "loss": 1.8256, "step": 6105 }, { "epoch": 0.03, "learning_rate": 9.974001439179815e-05, "loss": 1.839, "step": 6110 }, { "epoch": 0.03, "learning_rate": 9.973958199954696e-05, "loss": 1.7836, "step": 6115 }, { "epoch": 0.03, "learning_rate": 9.973914924896902e-05, "loss": 1.782, "step": 6120 }, { "epoch": 0.03, "learning_rate": 9.973871614006747e-05, "loss": 1.75, "step": 6125 }, { "epoch": 0.03, "learning_rate": 9.973828267284539e-05, "loss": 1.7444, "step": 6130 }, { "epoch": 0.03, "learning_rate": 9.973784884730595e-05, "loss": 1.782, "step": 6135 }, { "epoch": 0.03, "learning_rate": 9.973741466345223e-05, "loss": 1.8108, "step": 6140 }, { "epoch": 0.03, "learning_rate": 9.97369801212874e-05, "loss": 1.8058, "step": 6145 }, { "epoch": 0.03, "learning_rate": 9.973654522081456e-05, "loss": 1.7934, "step": 6150 }, { "epoch": 0.03, "learning_rate": 9.973610996203687e-05, "loss": 1.747, "step": 6155 }, { "epoch": 0.03, "learning_rate": 9.973567434495744e-05, "loss": 1.7796, "step": 6160 }, { "epoch": 0.03, "learning_rate": 9.973523836957943e-05, "loss": 1.7818, "step": 6165 }, { "epoch": 0.03, "learning_rate": 9.973480203590596e-05, "loss": 1.8186, "step": 6170 }, { "epoch": 0.03, "learning_rate": 9.973436534394016e-05, "loss": 1.7631, "step": 6175 }, { "epoch": 0.03, "learning_rate": 9.973392829368522e-05, "loss": 1.747, "step": 6180 }, { "epoch": 0.03, "learning_rate": 9.973349088514428e-05, "loss": 1.7816, "step": 6185 }, { "epoch": 0.03, "learning_rate": 9.973305311832045e-05, "loss": 1.8195, "step": 6190 }, { "epoch": 0.03, "learning_rate": 9.973261499321693e-05, "loss": 1.8204, "step": 6195 }, { "epoch": 0.03, "learning_rate": 9.973217650983684e-05, "loss": 1.8025, "step": 6200 }, { "epoch": 0.03, "learning_rate": 9.973173766818338e-05, "loss": 1.8189, "step": 6205 }, { "epoch": 0.03, "learning_rate": 9.973129846825968e-05, "loss": 1.824, "step": 6210 }, { "epoch": 0.03, "learning_rate": 9.97308589100689e-05, "loss": 1.7898, "step": 6215 }, { "epoch": 0.03, "learning_rate": 9.973041899361424e-05, "loss": 1.7882, "step": 6220 }, { "epoch": 0.03, "learning_rate": 9.972997871889885e-05, "loss": 1.796, "step": 6225 }, { "epoch": 0.03, "learning_rate": 9.972953808592587e-05, "loss": 1.7618, "step": 6230 }, { "epoch": 0.03, "learning_rate": 9.972909709469853e-05, "loss": 1.8597, "step": 6235 }, { "epoch": 0.03, "learning_rate": 9.972865574521997e-05, "loss": 1.8124, "step": 6240 }, { "epoch": 0.03, "learning_rate": 9.972821403749337e-05, "loss": 1.6938, "step": 6245 }, { "epoch": 0.03, "learning_rate": 9.972777197152195e-05, "loss": 1.7943, "step": 6250 }, { "epoch": 0.03, "learning_rate": 9.972732954730885e-05, "loss": 1.7531, "step": 6255 }, { "epoch": 0.03, "learning_rate": 9.972688676485729e-05, "loss": 1.7586, "step": 6260 }, { "epoch": 0.03, "learning_rate": 9.972644362417041e-05, "loss": 1.7823, "step": 6265 }, { "epoch": 0.03, "learning_rate": 9.972600012525147e-05, "loss": 1.7693, "step": 6270 }, { "epoch": 0.03, "learning_rate": 9.972555626810363e-05, "loss": 1.7375, "step": 6275 }, { "epoch": 0.03, "learning_rate": 9.972511205273008e-05, "loss": 1.7785, "step": 6280 }, { "epoch": 0.03, "learning_rate": 9.972466747913403e-05, "loss": 1.7256, "step": 6285 }, { "epoch": 0.03, "learning_rate": 9.972422254731868e-05, "loss": 1.8132, "step": 6290 }, { "epoch": 0.03, "learning_rate": 9.972377725728723e-05, "loss": 1.7533, "step": 6295 }, { "epoch": 0.03, "learning_rate": 9.97233316090429e-05, "loss": 1.8471, "step": 6300 }, { "epoch": 0.03, "learning_rate": 9.972288560258891e-05, "loss": 1.7695, "step": 6305 }, { "epoch": 0.03, "learning_rate": 9.972243923792845e-05, "loss": 1.7784, "step": 6310 }, { "epoch": 0.03, "learning_rate": 9.972199251506474e-05, "loss": 1.7222, "step": 6315 }, { "epoch": 0.03, "learning_rate": 9.9721545434001e-05, "loss": 1.7869, "step": 6320 }, { "epoch": 0.03, "learning_rate": 9.972109799474047e-05, "loss": 1.803, "step": 6325 }, { "epoch": 0.03, "learning_rate": 9.972065019728635e-05, "loss": 1.7686, "step": 6330 }, { "epoch": 0.03, "learning_rate": 9.972020204164188e-05, "loss": 1.6806, "step": 6335 }, { "epoch": 0.03, "learning_rate": 9.971975352781027e-05, "loss": 1.7756, "step": 6340 }, { "epoch": 0.03, "learning_rate": 9.971930465579477e-05, "loss": 1.7997, "step": 6345 }, { "epoch": 0.03, "learning_rate": 9.97188554255986e-05, "loss": 1.7374, "step": 6350 }, { "epoch": 0.03, "learning_rate": 9.9718405837225e-05, "loss": 1.8606, "step": 6355 }, { "epoch": 0.03, "learning_rate": 9.971795589067722e-05, "loss": 1.758, "step": 6360 }, { "epoch": 0.03, "learning_rate": 9.97175055859585e-05, "loss": 1.7222, "step": 6365 }, { "epoch": 0.03, "learning_rate": 9.971705492307207e-05, "loss": 1.7546, "step": 6370 }, { "epoch": 0.03, "learning_rate": 9.971660390202117e-05, "loss": 1.8534, "step": 6375 }, { "epoch": 0.03, "learning_rate": 9.971615252280906e-05, "loss": 1.8117, "step": 6380 }, { "epoch": 0.03, "learning_rate": 9.971570078543901e-05, "loss": 1.8004, "step": 6385 }, { "epoch": 0.03, "learning_rate": 9.971524868991426e-05, "loss": 1.8051, "step": 6390 }, { "epoch": 0.03, "learning_rate": 9.971479623623806e-05, "loss": 1.7009, "step": 6395 }, { "epoch": 0.03, "learning_rate": 9.971434342441368e-05, "loss": 1.7664, "step": 6400 }, { "epoch": 0.03, "learning_rate": 9.971389025444438e-05, "loss": 1.8028, "step": 6405 }, { "epoch": 0.03, "learning_rate": 9.971343672633341e-05, "loss": 1.8005, "step": 6410 }, { "epoch": 0.03, "learning_rate": 9.971298284008405e-05, "loss": 1.7459, "step": 6415 }, { "epoch": 0.03, "learning_rate": 9.971252859569958e-05, "loss": 1.7763, "step": 6420 }, { "epoch": 0.03, "learning_rate": 9.971207399318325e-05, "loss": 1.751, "step": 6425 }, { "epoch": 0.03, "learning_rate": 9.971161903253836e-05, "loss": 1.777, "step": 6430 }, { "epoch": 0.03, "learning_rate": 9.971116371376816e-05, "loss": 1.7104, "step": 6435 }, { "epoch": 0.03, "learning_rate": 9.971070803687596e-05, "loss": 1.7961, "step": 6440 }, { "epoch": 0.03, "learning_rate": 9.9710252001865e-05, "loss": 1.7361, "step": 6445 }, { "epoch": 0.03, "learning_rate": 9.970979560873863e-05, "loss": 1.7787, "step": 6450 }, { "epoch": 0.03, "learning_rate": 9.970933885750007e-05, "loss": 1.7927, "step": 6455 }, { "epoch": 0.03, "learning_rate": 9.970888174815266e-05, "loss": 1.7626, "step": 6460 }, { "epoch": 0.03, "learning_rate": 9.970842428069966e-05, "loss": 1.7792, "step": 6465 }, { "epoch": 0.03, "learning_rate": 9.97079664551444e-05, "loss": 1.8233, "step": 6470 }, { "epoch": 0.03, "learning_rate": 9.970750827149014e-05, "loss": 1.784, "step": 6475 }, { "epoch": 0.03, "learning_rate": 9.970704972974018e-05, "loss": 1.7238, "step": 6480 }, { "epoch": 0.04, "learning_rate": 9.970659082989788e-05, "loss": 1.7404, "step": 6485 }, { "epoch": 0.04, "learning_rate": 9.97061315719665e-05, "loss": 1.7598, "step": 6490 }, { "epoch": 0.04, "learning_rate": 9.970567195594933e-05, "loss": 1.7321, "step": 6495 }, { "epoch": 0.04, "learning_rate": 9.970521198184973e-05, "loss": 1.84, "step": 6500 }, { "epoch": 0.04, "learning_rate": 9.970475164967096e-05, "loss": 1.7691, "step": 6505 }, { "epoch": 0.04, "learning_rate": 9.97042909594164e-05, "loss": 1.7623, "step": 6510 }, { "epoch": 0.04, "learning_rate": 9.970382991108933e-05, "loss": 1.7986, "step": 6515 }, { "epoch": 0.04, "learning_rate": 9.970336850469306e-05, "loss": 1.7722, "step": 6520 }, { "epoch": 0.04, "learning_rate": 9.970290674023096e-05, "loss": 1.727, "step": 6525 }, { "epoch": 0.04, "learning_rate": 9.97024446177063e-05, "loss": 1.7553, "step": 6530 }, { "epoch": 0.04, "learning_rate": 9.970198213712246e-05, "loss": 1.7261, "step": 6535 }, { "epoch": 0.04, "learning_rate": 9.970151929848273e-05, "loss": 1.769, "step": 6540 }, { "epoch": 0.04, "learning_rate": 9.970105610179048e-05, "loss": 1.7422, "step": 6545 }, { "epoch": 0.04, "learning_rate": 9.970059254704902e-05, "loss": 1.7891, "step": 6550 }, { "epoch": 0.04, "learning_rate": 9.97001286342617e-05, "loss": 1.877, "step": 6555 }, { "epoch": 0.04, "learning_rate": 9.969966436343186e-05, "loss": 1.7496, "step": 6560 }, { "epoch": 0.04, "learning_rate": 9.969919973456286e-05, "loss": 1.7567, "step": 6565 }, { "epoch": 0.04, "learning_rate": 9.969873474765801e-05, "loss": 1.7506, "step": 6570 }, { "epoch": 0.04, "learning_rate": 9.969826940272069e-05, "loss": 1.7568, "step": 6575 }, { "epoch": 0.04, "learning_rate": 9.969780369975425e-05, "loss": 1.687, "step": 6580 }, { "epoch": 0.04, "learning_rate": 9.969733763876205e-05, "loss": 1.781, "step": 6585 }, { "epoch": 0.04, "learning_rate": 9.969687121974743e-05, "loss": 1.7449, "step": 6590 }, { "epoch": 0.04, "learning_rate": 9.969640444271375e-05, "loss": 1.7667, "step": 6595 }, { "epoch": 0.04, "learning_rate": 9.969593730766439e-05, "loss": 1.7642, "step": 6600 }, { "epoch": 0.04, "learning_rate": 9.96954698146027e-05, "loss": 1.7595, "step": 6605 }, { "epoch": 0.04, "learning_rate": 9.969500196353205e-05, "loss": 1.8087, "step": 6610 }, { "epoch": 0.04, "learning_rate": 9.969453375445583e-05, "loss": 1.8301, "step": 6615 }, { "epoch": 0.04, "learning_rate": 9.969406518737739e-05, "loss": 1.7616, "step": 6620 }, { "epoch": 0.04, "learning_rate": 9.969359626230011e-05, "loss": 1.8418, "step": 6625 }, { "epoch": 0.04, "learning_rate": 9.969312697922737e-05, "loss": 1.7077, "step": 6630 }, { "epoch": 0.04, "learning_rate": 9.969265733816256e-05, "loss": 1.7298, "step": 6635 }, { "epoch": 0.04, "learning_rate": 9.969218733910905e-05, "loss": 1.7844, "step": 6640 }, { "epoch": 0.04, "learning_rate": 9.969171698207022e-05, "loss": 1.79, "step": 6645 }, { "epoch": 0.04, "learning_rate": 9.969124626704948e-05, "loss": 1.7653, "step": 6650 }, { "epoch": 0.04, "learning_rate": 9.969077519405023e-05, "loss": 1.7379, "step": 6655 }, { "epoch": 0.04, "learning_rate": 9.969030376307583e-05, "loss": 1.7238, "step": 6660 }, { "epoch": 0.04, "learning_rate": 9.968983197412967e-05, "loss": 1.7455, "step": 6665 }, { "epoch": 0.04, "learning_rate": 9.968935982721518e-05, "loss": 1.7896, "step": 6670 }, { "epoch": 0.04, "learning_rate": 9.968888732233576e-05, "loss": 1.7281, "step": 6675 }, { "epoch": 0.04, "learning_rate": 9.968841445949481e-05, "loss": 1.723, "step": 6680 }, { "epoch": 0.04, "learning_rate": 9.968794123869572e-05, "loss": 1.8517, "step": 6685 }, { "epoch": 0.04, "learning_rate": 9.968746765994191e-05, "loss": 1.7406, "step": 6690 }, { "epoch": 0.04, "learning_rate": 9.96869937232368e-05, "loss": 1.7641, "step": 6695 }, { "epoch": 0.04, "learning_rate": 9.968651942858378e-05, "loss": 1.7761, "step": 6700 }, { "epoch": 0.04, "learning_rate": 9.96860447759863e-05, "loss": 1.8071, "step": 6705 }, { "epoch": 0.04, "learning_rate": 9.968556976544776e-05, "loss": 1.8274, "step": 6710 }, { "epoch": 0.04, "learning_rate": 9.968509439697158e-05, "loss": 1.8365, "step": 6715 }, { "epoch": 0.04, "learning_rate": 9.968461867056118e-05, "loss": 1.7324, "step": 6720 }, { "epoch": 0.04, "learning_rate": 9.968414258622002e-05, "loss": 1.868, "step": 6725 }, { "epoch": 0.04, "learning_rate": 9.968366614395148e-05, "loss": 1.7504, "step": 6730 }, { "epoch": 0.04, "learning_rate": 9.968318934375903e-05, "loss": 1.754, "step": 6735 }, { "epoch": 0.04, "learning_rate": 9.968271218564608e-05, "loss": 1.7909, "step": 6740 }, { "epoch": 0.04, "learning_rate": 9.968223466961611e-05, "loss": 1.7736, "step": 6745 }, { "epoch": 0.04, "learning_rate": 9.968175679567251e-05, "loss": 1.7449, "step": 6750 }, { "epoch": 0.04, "learning_rate": 9.968127856381875e-05, "loss": 1.796, "step": 6755 }, { "epoch": 0.04, "learning_rate": 9.968079997405826e-05, "loss": 1.7958, "step": 6760 }, { "epoch": 0.04, "learning_rate": 9.968032102639451e-05, "loss": 1.826, "step": 6765 }, { "epoch": 0.04, "learning_rate": 9.967984172083093e-05, "loss": 1.8029, "step": 6770 }, { "epoch": 0.04, "learning_rate": 9.967936205737096e-05, "loss": 1.7536, "step": 6775 }, { "epoch": 0.04, "learning_rate": 9.96788820360181e-05, "loss": 1.8385, "step": 6780 }, { "epoch": 0.04, "learning_rate": 9.967840165677578e-05, "loss": 1.7794, "step": 6785 }, { "epoch": 0.04, "learning_rate": 9.967792091964747e-05, "loss": 1.8021, "step": 6790 }, { "epoch": 0.04, "learning_rate": 9.967743982463659e-05, "loss": 1.8341, "step": 6795 }, { "epoch": 0.04, "learning_rate": 9.96769583717467e-05, "loss": 1.7747, "step": 6800 }, { "epoch": 0.04, "learning_rate": 9.967647656098117e-05, "loss": 1.8597, "step": 6805 }, { "epoch": 0.04, "learning_rate": 9.967599439234352e-05, "loss": 1.7625, "step": 6810 }, { "epoch": 0.04, "learning_rate": 9.967551186583723e-05, "loss": 1.774, "step": 6815 }, { "epoch": 0.04, "learning_rate": 9.967502898146574e-05, "loss": 1.796, "step": 6820 }, { "epoch": 0.04, "learning_rate": 9.967454573923257e-05, "loss": 1.8046, "step": 6825 }, { "epoch": 0.04, "learning_rate": 9.967406213914117e-05, "loss": 1.7483, "step": 6830 }, { "epoch": 0.04, "learning_rate": 9.967357818119505e-05, "loss": 1.8443, "step": 6835 }, { "epoch": 0.04, "learning_rate": 9.967309386539767e-05, "loss": 1.7954, "step": 6840 }, { "epoch": 0.04, "learning_rate": 9.967260919175253e-05, "loss": 1.7721, "step": 6845 }, { "epoch": 0.04, "learning_rate": 9.967212416026313e-05, "loss": 1.8179, "step": 6850 }, { "epoch": 0.04, "learning_rate": 9.967163877093295e-05, "loss": 1.7554, "step": 6855 }, { "epoch": 0.04, "learning_rate": 9.967115302376551e-05, "loss": 1.8216, "step": 6860 }, { "epoch": 0.04, "learning_rate": 9.967066691876427e-05, "loss": 1.7558, "step": 6865 }, { "epoch": 0.04, "learning_rate": 9.967018045593278e-05, "loss": 1.7602, "step": 6870 }, { "epoch": 0.04, "learning_rate": 9.96696936352745e-05, "loss": 1.7995, "step": 6875 }, { "epoch": 0.04, "learning_rate": 9.966920645679296e-05, "loss": 1.8228, "step": 6880 }, { "epoch": 0.04, "learning_rate": 9.966871892049168e-05, "loss": 1.7531, "step": 6885 }, { "epoch": 0.04, "learning_rate": 9.966823102637415e-05, "loss": 1.7706, "step": 6890 }, { "epoch": 0.04, "learning_rate": 9.96677427744439e-05, "loss": 1.7477, "step": 6895 }, { "epoch": 0.04, "learning_rate": 9.966725416470444e-05, "loss": 1.7887, "step": 6900 }, { "epoch": 0.04, "learning_rate": 9.966676519715928e-05, "loss": 1.7006, "step": 6905 }, { "epoch": 0.04, "learning_rate": 9.966627587181197e-05, "loss": 1.791, "step": 6910 }, { "epoch": 0.04, "learning_rate": 9.9665786188666e-05, "loss": 1.7512, "step": 6915 }, { "epoch": 0.04, "learning_rate": 9.966529614772493e-05, "loss": 1.7652, "step": 6920 }, { "epoch": 0.04, "learning_rate": 9.966480574899227e-05, "loss": 1.752, "step": 6925 }, { "epoch": 0.04, "learning_rate": 9.966431499247156e-05, "loss": 1.7817, "step": 6930 }, { "epoch": 0.04, "learning_rate": 9.966382387816633e-05, "loss": 1.8005, "step": 6935 }, { "epoch": 0.04, "learning_rate": 9.966333240608013e-05, "loss": 1.833, "step": 6940 }, { "epoch": 0.04, "learning_rate": 9.96628405762165e-05, "loss": 1.6925, "step": 6945 }, { "epoch": 0.04, "learning_rate": 9.966234838857898e-05, "loss": 1.8298, "step": 6950 }, { "epoch": 0.04, "learning_rate": 9.966185584317109e-05, "loss": 1.7927, "step": 6955 }, { "epoch": 0.04, "learning_rate": 9.966136293999642e-05, "loss": 1.7796, "step": 6960 }, { "epoch": 0.04, "learning_rate": 9.966086967905848e-05, "loss": 1.742, "step": 6965 }, { "epoch": 0.04, "learning_rate": 9.966037606036088e-05, "loss": 1.7021, "step": 6970 }, { "epoch": 0.04, "learning_rate": 9.965988208390711e-05, "loss": 1.808, "step": 6975 }, { "epoch": 0.04, "learning_rate": 9.965938774970077e-05, "loss": 1.7996, "step": 6980 }, { "epoch": 0.04, "learning_rate": 9.965889305774542e-05, "loss": 1.7214, "step": 6985 }, { "epoch": 0.04, "learning_rate": 9.96583980080446e-05, "loss": 1.7843, "step": 6990 }, { "epoch": 0.04, "learning_rate": 9.965790260060191e-05, "loss": 1.7213, "step": 6995 }, { "epoch": 0.04, "learning_rate": 9.965740683542089e-05, "loss": 1.7491, "step": 7000 }, { "epoch": 0.04, "learning_rate": 9.965691071250512e-05, "loss": 1.8405, "step": 7005 }, { "epoch": 0.04, "learning_rate": 9.965641423185816e-05, "loss": 1.8097, "step": 7010 }, { "epoch": 0.04, "learning_rate": 9.965591739348363e-05, "loss": 1.8049, "step": 7015 }, { "epoch": 0.04, "learning_rate": 9.965542019738507e-05, "loss": 1.7352, "step": 7020 }, { "epoch": 0.04, "learning_rate": 9.965492264356607e-05, "loss": 1.83, "step": 7025 }, { "epoch": 0.04, "learning_rate": 9.965442473203023e-05, "loss": 1.7478, "step": 7030 }, { "epoch": 0.04, "learning_rate": 9.965392646278112e-05, "loss": 1.7715, "step": 7035 }, { "epoch": 0.04, "learning_rate": 9.965342783582233e-05, "loss": 1.8367, "step": 7040 }, { "epoch": 0.04, "learning_rate": 9.965292885115746e-05, "loss": 1.8677, "step": 7045 }, { "epoch": 0.04, "learning_rate": 9.965242950879011e-05, "loss": 1.6774, "step": 7050 }, { "epoch": 0.04, "learning_rate": 9.965192980872384e-05, "loss": 1.7499, "step": 7055 }, { "epoch": 0.04, "learning_rate": 9.965142975096231e-05, "loss": 1.8038, "step": 7060 }, { "epoch": 0.04, "learning_rate": 9.965092933550908e-05, "loss": 1.7737, "step": 7065 }, { "epoch": 0.04, "learning_rate": 9.965042856236776e-05, "loss": 1.7764, "step": 7070 }, { "epoch": 0.04, "learning_rate": 9.964992743154197e-05, "loss": 1.8019, "step": 7075 }, { "epoch": 0.04, "learning_rate": 9.964942594303531e-05, "loss": 1.8089, "step": 7080 }, { "epoch": 0.04, "learning_rate": 9.96489240968514e-05, "loss": 1.7894, "step": 7085 }, { "epoch": 0.04, "learning_rate": 9.964842189299385e-05, "loss": 1.7122, "step": 7090 }, { "epoch": 0.04, "learning_rate": 9.964791933146629e-05, "loss": 1.8079, "step": 7095 }, { "epoch": 0.04, "learning_rate": 9.964741641227232e-05, "loss": 1.7622, "step": 7100 }, { "epoch": 0.04, "learning_rate": 9.964691313541556e-05, "loss": 1.8285, "step": 7105 }, { "epoch": 0.04, "learning_rate": 9.964640950089968e-05, "loss": 1.7972, "step": 7110 }, { "epoch": 0.04, "learning_rate": 9.964590550872826e-05, "loss": 1.7123, "step": 7115 }, { "epoch": 0.04, "learning_rate": 9.964540115890494e-05, "loss": 1.8539, "step": 7120 }, { "epoch": 0.04, "learning_rate": 9.964489645143337e-05, "loss": 1.7747, "step": 7125 }, { "epoch": 0.04, "learning_rate": 9.964439138631718e-05, "loss": 1.7461, "step": 7130 }, { "epoch": 0.04, "learning_rate": 9.964388596356e-05, "loss": 1.7706, "step": 7135 }, { "epoch": 0.04, "learning_rate": 9.964338018316547e-05, "loss": 1.7716, "step": 7140 }, { "epoch": 0.04, "learning_rate": 9.964287404513726e-05, "loss": 1.7769, "step": 7145 }, { "epoch": 0.04, "learning_rate": 9.964236754947898e-05, "loss": 1.7949, "step": 7150 }, { "epoch": 0.04, "learning_rate": 9.96418606961943e-05, "loss": 1.8069, "step": 7155 }, { "epoch": 0.04, "learning_rate": 9.964135348528688e-05, "loss": 1.7693, "step": 7160 }, { "epoch": 0.04, "learning_rate": 9.964084591676035e-05, "loss": 1.7794, "step": 7165 }, { "epoch": 0.04, "learning_rate": 9.964033799061838e-05, "loss": 1.7714, "step": 7170 }, { "epoch": 0.04, "learning_rate": 9.963982970686461e-05, "loss": 1.7589, "step": 7175 }, { "epoch": 0.04, "learning_rate": 9.963932106550274e-05, "loss": 1.8282, "step": 7180 }, { "epoch": 0.04, "learning_rate": 9.963881206653641e-05, "loss": 1.7518, "step": 7185 }, { "epoch": 0.04, "learning_rate": 9.963830270996928e-05, "loss": 1.6864, "step": 7190 }, { "epoch": 0.04, "learning_rate": 9.963779299580504e-05, "loss": 1.8705, "step": 7195 }, { "epoch": 0.04, "learning_rate": 9.963728292404734e-05, "loss": 1.8075, "step": 7200 }, { "epoch": 0.04, "learning_rate": 9.963677249469987e-05, "loss": 1.7915, "step": 7205 }, { "epoch": 0.04, "learning_rate": 9.96362617077663e-05, "loss": 1.6847, "step": 7210 }, { "epoch": 0.04, "learning_rate": 9.963575056325032e-05, "loss": 1.7956, "step": 7215 }, { "epoch": 0.04, "learning_rate": 9.96352390611556e-05, "loss": 1.7151, "step": 7220 }, { "epoch": 0.04, "learning_rate": 9.963472720148584e-05, "loss": 1.8053, "step": 7225 }, { "epoch": 0.04, "learning_rate": 9.96342149842447e-05, "loss": 1.7862, "step": 7230 }, { "epoch": 0.04, "learning_rate": 9.96337024094359e-05, "loss": 1.7785, "step": 7235 }, { "epoch": 0.04, "learning_rate": 9.963318947706311e-05, "loss": 1.7511, "step": 7240 }, { "epoch": 0.04, "learning_rate": 9.963267618713004e-05, "loss": 1.7367, "step": 7245 }, { "epoch": 0.04, "learning_rate": 9.963216253964038e-05, "loss": 1.7748, "step": 7250 }, { "epoch": 0.04, "learning_rate": 9.963164853459783e-05, "loss": 1.7679, "step": 7255 }, { "epoch": 0.04, "learning_rate": 9.963113417200612e-05, "loss": 1.7698, "step": 7260 }, { "epoch": 0.04, "learning_rate": 9.96306194518689e-05, "loss": 1.7953, "step": 7265 }, { "epoch": 0.04, "learning_rate": 9.963010437418992e-05, "loss": 1.7908, "step": 7270 }, { "epoch": 0.04, "learning_rate": 9.96295889389729e-05, "loss": 1.7358, "step": 7275 }, { "epoch": 0.04, "learning_rate": 9.962907314622149e-05, "loss": 1.8492, "step": 7280 }, { "epoch": 0.04, "learning_rate": 9.962855699593949e-05, "loss": 1.7579, "step": 7285 }, { "epoch": 0.04, "learning_rate": 9.962804048813056e-05, "loss": 1.783, "step": 7290 }, { "epoch": 0.04, "learning_rate": 9.962752362279844e-05, "loss": 1.8077, "step": 7295 }, { "epoch": 0.04, "learning_rate": 9.962700639994686e-05, "loss": 1.7362, "step": 7300 }, { "epoch": 0.04, "learning_rate": 9.962648881957952e-05, "loss": 1.7168, "step": 7305 }, { "epoch": 0.04, "learning_rate": 9.962597088170019e-05, "loss": 1.6584, "step": 7310 }, { "epoch": 0.04, "learning_rate": 9.962545258631257e-05, "loss": 1.8249, "step": 7315 }, { "epoch": 0.04, "learning_rate": 9.96249339334204e-05, "loss": 1.7559, "step": 7320 }, { "epoch": 0.04, "learning_rate": 9.962441492302741e-05, "loss": 1.8164, "step": 7325 }, { "epoch": 0.04, "learning_rate": 9.962389555513736e-05, "loss": 1.7097, "step": 7330 }, { "epoch": 0.04, "learning_rate": 9.962337582975396e-05, "loss": 1.7029, "step": 7335 }, { "epoch": 0.04, "learning_rate": 9.962285574688099e-05, "loss": 1.7805, "step": 7340 }, { "epoch": 0.04, "learning_rate": 9.962233530652219e-05, "loss": 1.8183, "step": 7345 }, { "epoch": 0.04, "learning_rate": 9.962181450868127e-05, "loss": 1.8264, "step": 7350 }, { "epoch": 0.04, "learning_rate": 9.962129335336203e-05, "loss": 1.8483, "step": 7355 }, { "epoch": 0.04, "learning_rate": 9.962077184056821e-05, "loss": 1.7853, "step": 7360 }, { "epoch": 0.04, "learning_rate": 9.962024997030354e-05, "loss": 1.7446, "step": 7365 }, { "epoch": 0.04, "learning_rate": 9.961972774257182e-05, "loss": 1.7869, "step": 7370 }, { "epoch": 0.04, "learning_rate": 9.961920515737679e-05, "loss": 1.8354, "step": 7375 }, { "epoch": 0.04, "learning_rate": 9.961868221472221e-05, "loss": 1.7844, "step": 7380 }, { "epoch": 0.04, "learning_rate": 9.961815891461188e-05, "loss": 1.8115, "step": 7385 }, { "epoch": 0.04, "learning_rate": 9.961763525704952e-05, "loss": 1.6813, "step": 7390 }, { "epoch": 0.04, "learning_rate": 9.961711124203894e-05, "loss": 1.7244, "step": 7395 }, { "epoch": 0.04, "learning_rate": 9.961658686958391e-05, "loss": 1.849, "step": 7400 }, { "epoch": 0.04, "learning_rate": 9.961606213968819e-05, "loss": 1.773, "step": 7405 }, { "epoch": 0.04, "learning_rate": 9.961553705235558e-05, "loss": 1.7448, "step": 7410 }, { "epoch": 0.04, "learning_rate": 9.961501160758985e-05, "loss": 1.8068, "step": 7415 }, { "epoch": 0.04, "learning_rate": 9.961448580539478e-05, "loss": 1.692, "step": 7420 }, { "epoch": 0.04, "learning_rate": 9.961395964577418e-05, "loss": 1.6781, "step": 7425 }, { "epoch": 0.04, "learning_rate": 9.961343312873181e-05, "loss": 1.7289, "step": 7430 }, { "epoch": 0.04, "learning_rate": 9.96129062542715e-05, "loss": 1.7352, "step": 7435 }, { "epoch": 0.04, "learning_rate": 9.961237902239703e-05, "loss": 1.7754, "step": 7440 }, { "epoch": 0.04, "learning_rate": 9.961185143311218e-05, "loss": 1.8255, "step": 7445 }, { "epoch": 0.04, "learning_rate": 9.961132348642077e-05, "loss": 1.7752, "step": 7450 }, { "epoch": 0.04, "learning_rate": 9.96107951823266e-05, "loss": 1.7332, "step": 7455 }, { "epoch": 0.04, "learning_rate": 9.961026652083347e-05, "loss": 1.7394, "step": 7460 }, { "epoch": 0.04, "learning_rate": 9.96097375019452e-05, "loss": 1.6787, "step": 7465 }, { "epoch": 0.04, "learning_rate": 9.960920812566559e-05, "loss": 1.7188, "step": 7470 }, { "epoch": 0.04, "learning_rate": 9.960867839199847e-05, "loss": 1.7543, "step": 7475 }, { "epoch": 0.04, "learning_rate": 9.960814830094763e-05, "loss": 1.7508, "step": 7480 }, { "epoch": 0.04, "learning_rate": 9.960761785251691e-05, "loss": 1.7763, "step": 7485 }, { "epoch": 0.04, "learning_rate": 9.960708704671011e-05, "loss": 1.7705, "step": 7490 }, { "epoch": 0.04, "learning_rate": 9.96065558835311e-05, "loss": 1.7933, "step": 7495 }, { "epoch": 0.04, "learning_rate": 9.960602436298364e-05, "loss": 1.7894, "step": 7500 }, { "epoch": 0.04, "learning_rate": 9.960549248507161e-05, "loss": 1.7459, "step": 7505 }, { "epoch": 0.04, "learning_rate": 9.960496024979881e-05, "loss": 1.7711, "step": 7510 }, { "epoch": 0.04, "learning_rate": 9.960442765716912e-05, "loss": 1.7482, "step": 7515 }, { "epoch": 0.04, "learning_rate": 9.960389470718632e-05, "loss": 1.8137, "step": 7520 }, { "epoch": 0.04, "learning_rate": 9.960336139985429e-05, "loss": 1.8019, "step": 7525 }, { "epoch": 0.04, "learning_rate": 9.960282773517687e-05, "loss": 1.7664, "step": 7530 }, { "epoch": 0.04, "learning_rate": 9.960229371315787e-05, "loss": 1.7736, "step": 7535 }, { "epoch": 0.04, "learning_rate": 9.960175933380116e-05, "loss": 1.7913, "step": 7540 }, { "epoch": 0.04, "learning_rate": 9.960122459711061e-05, "loss": 1.7905, "step": 7545 }, { "epoch": 0.04, "learning_rate": 9.960068950309002e-05, "loss": 1.8095, "step": 7550 }, { "epoch": 0.04, "learning_rate": 9.96001540517433e-05, "loss": 1.7637, "step": 7555 }, { "epoch": 0.04, "learning_rate": 9.95996182430743e-05, "loss": 1.834, "step": 7560 }, { "epoch": 0.04, "learning_rate": 9.959908207708683e-05, "loss": 1.8095, "step": 7565 }, { "epoch": 0.04, "learning_rate": 9.959854555378481e-05, "loss": 1.7832, "step": 7570 }, { "epoch": 0.04, "learning_rate": 9.959800867317206e-05, "loss": 1.7898, "step": 7575 }, { "epoch": 0.04, "learning_rate": 9.959747143525249e-05, "loss": 1.8225, "step": 7580 }, { "epoch": 0.04, "learning_rate": 9.959693384002992e-05, "loss": 1.8185, "step": 7585 }, { "epoch": 0.04, "learning_rate": 9.959639588750828e-05, "loss": 1.7434, "step": 7590 }, { "epoch": 0.04, "learning_rate": 9.959585757769142e-05, "loss": 1.7911, "step": 7595 }, { "epoch": 0.04, "learning_rate": 9.95953189105832e-05, "loss": 1.7566, "step": 7600 }, { "epoch": 0.04, "learning_rate": 9.959477988618753e-05, "loss": 1.7717, "step": 7605 }, { "epoch": 0.04, "learning_rate": 9.959424050450827e-05, "loss": 1.7854, "step": 7610 }, { "epoch": 0.04, "learning_rate": 9.959370076554933e-05, "loss": 1.7302, "step": 7615 }, { "epoch": 0.04, "learning_rate": 9.959316066931457e-05, "loss": 1.7952, "step": 7620 }, { "epoch": 0.04, "learning_rate": 9.95926202158079e-05, "loss": 1.7887, "step": 7625 }, { "epoch": 0.04, "learning_rate": 9.959207940503321e-05, "loss": 1.8322, "step": 7630 }, { "epoch": 0.04, "learning_rate": 9.959153823699439e-05, "loss": 1.7874, "step": 7635 }, { "epoch": 0.04, "learning_rate": 9.959099671169534e-05, "loss": 1.7712, "step": 7640 }, { "epoch": 0.04, "learning_rate": 9.959045482913997e-05, "loss": 1.7292, "step": 7645 }, { "epoch": 0.04, "learning_rate": 9.958991258933217e-05, "loss": 1.6971, "step": 7650 }, { "epoch": 0.04, "learning_rate": 9.958936999227586e-05, "loss": 1.7795, "step": 7655 }, { "epoch": 0.04, "learning_rate": 9.958882703797494e-05, "loss": 1.729, "step": 7660 }, { "epoch": 0.04, "learning_rate": 9.958828372643333e-05, "loss": 1.7684, "step": 7665 }, { "epoch": 0.04, "learning_rate": 9.958774005765495e-05, "loss": 1.7359, "step": 7670 }, { "epoch": 0.04, "learning_rate": 9.958719603164368e-05, "loss": 1.7844, "step": 7675 }, { "epoch": 0.04, "learning_rate": 9.958665164840347e-05, "loss": 1.7206, "step": 7680 }, { "epoch": 0.04, "learning_rate": 9.958610690793825e-05, "loss": 1.7484, "step": 7685 }, { "epoch": 0.04, "learning_rate": 9.958556181025191e-05, "loss": 1.7593, "step": 7690 }, { "epoch": 0.04, "learning_rate": 9.958501635534842e-05, "loss": 1.7326, "step": 7695 }, { "epoch": 0.04, "learning_rate": 9.958447054323167e-05, "loss": 1.7532, "step": 7700 }, { "epoch": 0.04, "learning_rate": 9.958392437390562e-05, "loss": 1.7275, "step": 7705 }, { "epoch": 0.04, "learning_rate": 9.95833778473742e-05, "loss": 1.7973, "step": 7710 }, { "epoch": 0.04, "learning_rate": 9.958283096364132e-05, "loss": 1.7357, "step": 7715 }, { "epoch": 0.04, "learning_rate": 9.958228372271094e-05, "loss": 1.8213, "step": 7720 }, { "epoch": 0.04, "learning_rate": 9.958173612458701e-05, "loss": 1.7306, "step": 7725 }, { "epoch": 0.04, "learning_rate": 9.958118816927348e-05, "loss": 1.8158, "step": 7730 }, { "epoch": 0.04, "learning_rate": 9.958063985677427e-05, "loss": 1.782, "step": 7735 }, { "epoch": 0.04, "learning_rate": 9.958009118709335e-05, "loss": 1.7719, "step": 7740 }, { "epoch": 0.04, "learning_rate": 9.957954216023468e-05, "loss": 1.7579, "step": 7745 }, { "epoch": 0.04, "learning_rate": 9.957899277620219e-05, "loss": 1.6986, "step": 7750 }, { "epoch": 0.04, "learning_rate": 9.957844303499984e-05, "loss": 1.7566, "step": 7755 }, { "epoch": 0.04, "learning_rate": 9.957789293663162e-05, "loss": 1.7639, "step": 7760 }, { "epoch": 0.04, "learning_rate": 9.957734248110146e-05, "loss": 1.7804, "step": 7765 }, { "epoch": 0.04, "learning_rate": 9.957679166841336e-05, "loss": 1.7815, "step": 7770 }, { "epoch": 0.04, "learning_rate": 9.957624049857126e-05, "loss": 1.7527, "step": 7775 }, { "epoch": 0.04, "learning_rate": 9.957568897157914e-05, "loss": 1.7564, "step": 7780 }, { "epoch": 0.04, "learning_rate": 9.957513708744096e-05, "loss": 1.7251, "step": 7785 }, { "epoch": 0.04, "learning_rate": 9.957458484616071e-05, "loss": 1.7061, "step": 7790 }, { "epoch": 0.04, "learning_rate": 9.957403224774238e-05, "loss": 1.7757, "step": 7795 }, { "epoch": 0.04, "learning_rate": 9.957347929218992e-05, "loss": 1.7877, "step": 7800 }, { "epoch": 0.04, "learning_rate": 9.957292597950733e-05, "loss": 1.7528, "step": 7805 }, { "epoch": 0.04, "learning_rate": 9.957237230969863e-05, "loss": 1.7359, "step": 7810 }, { "epoch": 0.04, "learning_rate": 9.957181828276774e-05, "loss": 1.724, "step": 7815 }, { "epoch": 0.04, "learning_rate": 9.957126389871869e-05, "loss": 1.7732, "step": 7820 }, { "epoch": 0.04, "learning_rate": 9.957070915755548e-05, "loss": 1.8105, "step": 7825 }, { "epoch": 0.04, "learning_rate": 9.957015405928209e-05, "loss": 1.7441, "step": 7830 }, { "epoch": 0.04, "learning_rate": 9.956959860390254e-05, "loss": 1.7271, "step": 7835 }, { "epoch": 0.04, "learning_rate": 9.95690427914208e-05, "loss": 1.7526, "step": 7840 }, { "epoch": 0.04, "learning_rate": 9.95684866218409e-05, "loss": 1.8165, "step": 7845 }, { "epoch": 0.04, "learning_rate": 9.956793009516684e-05, "loss": 1.7586, "step": 7850 }, { "epoch": 0.04, "learning_rate": 9.956737321140261e-05, "loss": 1.7711, "step": 7855 }, { "epoch": 0.04, "learning_rate": 9.956681597055226e-05, "loss": 1.8076, "step": 7860 }, { "epoch": 0.04, "learning_rate": 9.956625837261978e-05, "loss": 1.7939, "step": 7865 }, { "epoch": 0.04, "learning_rate": 9.956570041760919e-05, "loss": 1.7446, "step": 7870 }, { "epoch": 0.04, "learning_rate": 9.95651421055245e-05, "loss": 1.7868, "step": 7875 }, { "epoch": 0.04, "learning_rate": 9.956458343636974e-05, "loss": 1.7573, "step": 7880 }, { "epoch": 0.04, "learning_rate": 9.956402441014896e-05, "loss": 1.7288, "step": 7885 }, { "epoch": 0.04, "learning_rate": 9.956346502686614e-05, "loss": 1.8132, "step": 7890 }, { "epoch": 0.04, "learning_rate": 9.956290528652534e-05, "loss": 1.756, "step": 7895 }, { "epoch": 0.04, "learning_rate": 9.95623451891306e-05, "loss": 1.8156, "step": 7900 }, { "epoch": 0.04, "learning_rate": 9.956178473468593e-05, "loss": 1.8226, "step": 7905 }, { "epoch": 0.04, "learning_rate": 9.956122392319538e-05, "loss": 1.7779, "step": 7910 }, { "epoch": 0.04, "learning_rate": 9.956066275466299e-05, "loss": 1.7922, "step": 7915 }, { "epoch": 0.04, "learning_rate": 9.956010122909278e-05, "loss": 1.7432, "step": 7920 }, { "epoch": 0.04, "learning_rate": 9.955953934648886e-05, "loss": 1.8373, "step": 7925 }, { "epoch": 0.04, "learning_rate": 9.95589771068552e-05, "loss": 1.8096, "step": 7930 }, { "epoch": 0.04, "learning_rate": 9.95584145101959e-05, "loss": 1.689, "step": 7935 }, { "epoch": 0.04, "learning_rate": 9.955785155651498e-05, "loss": 1.7575, "step": 7940 }, { "epoch": 0.04, "learning_rate": 9.955728824581653e-05, "loss": 1.7781, "step": 7945 }, { "epoch": 0.04, "learning_rate": 9.955672457810458e-05, "loss": 1.7442, "step": 7950 }, { "epoch": 0.04, "learning_rate": 9.955616055338323e-05, "loss": 1.794, "step": 7955 }, { "epoch": 0.04, "learning_rate": 9.955559617165648e-05, "loss": 1.8175, "step": 7960 }, { "epoch": 0.04, "learning_rate": 9.955503143292846e-05, "loss": 1.7136, "step": 7965 }, { "epoch": 0.04, "learning_rate": 9.95544663372032e-05, "loss": 1.7505, "step": 7970 }, { "epoch": 0.04, "learning_rate": 9.955390088448477e-05, "loss": 1.787, "step": 7975 }, { "epoch": 0.04, "learning_rate": 9.955333507477726e-05, "loss": 1.7492, "step": 7980 }, { "epoch": 0.04, "learning_rate": 9.955276890808476e-05, "loss": 1.6396, "step": 7985 }, { "epoch": 0.04, "learning_rate": 9.955220238441132e-05, "loss": 1.7894, "step": 7990 }, { "epoch": 0.04, "learning_rate": 9.955163550376101e-05, "loss": 1.7396, "step": 7995 }, { "epoch": 0.04, "learning_rate": 9.955106826613796e-05, "loss": 1.7773, "step": 8000 } ], "logging_steps": 5, "max_steps": 185168, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 2.122051858150195e+19, "trial_name": null, "trial_params": null }