{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.11996925787766885, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-06, "loss": 1.6246, "step": 5 }, { "epoch": 0.0, "learning_rate": 1e-05, "loss": 1.6269, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.5e-05, "loss": 1.6391, "step": 15 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 1.6301, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.5e-05, "loss": 1.6152, "step": 25 }, { "epoch": 0.0, "learning_rate": 3e-05, "loss": 1.5995, "step": 30 }, { "epoch": 0.0, "learning_rate": 3.5e-05, "loss": 1.5926, "step": 35 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 1.5786, "step": 40 }, { "epoch": 0.0, "learning_rate": 4.5e-05, "loss": 1.5428, "step": 45 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 1.5604, "step": 50 }, { "epoch": 0.0, "learning_rate": 5.500000000000001e-05, "loss": 1.5428, "step": 55 }, { "epoch": 0.0, "learning_rate": 6e-05, "loss": 1.5432, "step": 60 }, { "epoch": 0.0, "learning_rate": 6.500000000000001e-05, "loss": 1.5311, "step": 65 }, { "epoch": 0.0, "learning_rate": 7e-05, "loss": 1.5128, "step": 70 }, { "epoch": 0.0, "learning_rate": 7.500000000000001e-05, "loss": 1.4994, "step": 75 }, { "epoch": 0.0, "learning_rate": 8e-05, "loss": 1.4914, "step": 80 }, { "epoch": 0.0, "learning_rate": 8.5e-05, "loss": 1.4734, "step": 85 }, { "epoch": 0.0, "learning_rate": 9e-05, "loss": 1.4776, "step": 90 }, { "epoch": 0.0, "learning_rate": 9.5e-05, "loss": 1.4431, "step": 95 }, { "epoch": 0.0, "learning_rate": 0.0001, "loss": 1.4292, "step": 100 }, { "epoch": 0.0, "learning_rate": 9.999999860859665e-05, "loss": 1.4445, "step": 105 }, { "epoch": 0.0, "learning_rate": 9.99999944343867e-05, "loss": 1.4319, "step": 110 }, { "epoch": 0.0, "learning_rate": 9.999998747737038e-05, "loss": 1.4423, "step": 115 }, { "epoch": 0.0, "learning_rate": 9.999997773754806e-05, "loss": 1.4234, "step": 120 }, { "epoch": 0.0, "learning_rate": 9.99999652149203e-05, "loss": 1.4259, "step": 125 }, { "epoch": 0.0, "learning_rate": 9.999994990948778e-05, "loss": 1.4311, "step": 130 }, { "epoch": 0.0, "learning_rate": 9.999993182125136e-05, "loss": 1.4231, "step": 135 }, { "epoch": 0.0, "learning_rate": 9.999991095021205e-05, "loss": 1.4206, "step": 140 }, { "epoch": 0.0, "learning_rate": 9.999988729637101e-05, "loss": 1.3838, "step": 145 }, { "epoch": 0.0, "learning_rate": 9.999986085972956e-05, "loss": 1.3876, "step": 150 }, { "epoch": 0.0, "learning_rate": 9.999983164028916e-05, "loss": 1.4049, "step": 155 }, { "epoch": 0.0, "learning_rate": 9.999979963805146e-05, "loss": 1.3645, "step": 160 }, { "epoch": 0.0, "learning_rate": 9.99997648530182e-05, "loss": 1.3667, "step": 165 }, { "epoch": 0.0, "learning_rate": 9.999972728519136e-05, "loss": 1.3844, "step": 170 }, { "epoch": 0.0, "learning_rate": 9.9999686934573e-05, "loss": 1.3697, "step": 175 }, { "epoch": 0.0, "learning_rate": 9.999964380116538e-05, "loss": 1.3499, "step": 180 }, { "epoch": 0.0, "learning_rate": 9.999959788497089e-05, "loss": 1.3791, "step": 185 }, { "epoch": 0.0, "learning_rate": 9.999954918599211e-05, "loss": 1.3394, "step": 190 }, { "epoch": 0.0, "learning_rate": 9.999949770423173e-05, "loss": 1.3431, "step": 195 }, { "epoch": 0.0, "learning_rate": 9.99994434396926e-05, "loss": 1.3691, "step": 200 }, { "epoch": 0.0, "learning_rate": 9.999938639237779e-05, "loss": 1.3418, "step": 205 }, { "epoch": 0.0, "learning_rate": 9.999932656229044e-05, "loss": 1.3525, "step": 210 }, { "epoch": 0.0, "learning_rate": 9.999926394943386e-05, "loss": 1.3563, "step": 215 }, { "epoch": 0.0, "learning_rate": 9.999919855381156e-05, "loss": 1.353, "step": 220 }, { "epoch": 0.0, "learning_rate": 9.999913037542721e-05, "loss": 1.3144, "step": 225 }, { "epoch": 0.0, "learning_rate": 9.999905941428454e-05, "loss": 1.3499, "step": 230 }, { "epoch": 0.0, "learning_rate": 9.999898567038755e-05, "loss": 1.3294, "step": 235 }, { "epoch": 0.0, "learning_rate": 9.999890914374033e-05, "loss": 1.3265, "step": 240 }, { "epoch": 0.0, "learning_rate": 9.999882983434713e-05, "loss": 1.3102, "step": 245 }, { "epoch": 0.0, "learning_rate": 9.999874774221237e-05, "loss": 1.313, "step": 250 }, { "epoch": 0.0, "learning_rate": 9.999866286734062e-05, "loss": 1.3232, "step": 255 }, { "epoch": 0.0, "learning_rate": 9.99985752097366e-05, "loss": 1.3014, "step": 260 }, { "epoch": 0.0, "learning_rate": 9.99984847694052e-05, "loss": 1.311, "step": 265 }, { "epoch": 0.0, "learning_rate": 9.999839154635143e-05, "loss": 1.3241, "step": 270 }, { "epoch": 0.0, "learning_rate": 9.999829554058051e-05, "loss": 1.3248, "step": 275 }, { "epoch": 0.0, "learning_rate": 9.999819675209776e-05, "loss": 1.3087, "step": 280 }, { "epoch": 0.0, "learning_rate": 9.999809518090867e-05, "loss": 1.3014, "step": 285 }, { "epoch": 0.0, "learning_rate": 9.999799082701893e-05, "loss": 1.3289, "step": 290 }, { "epoch": 0.0, "learning_rate": 9.999788369043433e-05, "loss": 1.3138, "step": 295 }, { "epoch": 0.0, "learning_rate": 9.999777377116082e-05, "loss": 1.2992, "step": 300 }, { "epoch": 0.0, "learning_rate": 9.999766106920452e-05, "loss": 1.2954, "step": 305 }, { "epoch": 0.0, "learning_rate": 9.99975455845717e-05, "loss": 1.3154, "step": 310 }, { "epoch": 0.0, "learning_rate": 9.999742731726882e-05, "loss": 1.287, "step": 315 }, { "epoch": 0.0, "learning_rate": 9.999730626730242e-05, "loss": 1.3067, "step": 320 }, { "epoch": 0.0, "learning_rate": 9.999718243467927e-05, "loss": 1.3134, "step": 325 }, { "epoch": 0.0, "learning_rate": 9.999705581940624e-05, "loss": 1.3038, "step": 330 }, { "epoch": 0.01, "learning_rate": 9.99969264214904e-05, "loss": 1.3084, "step": 335 }, { "epoch": 0.01, "learning_rate": 9.999679424093892e-05, "loss": 1.2856, "step": 340 }, { "epoch": 0.01, "learning_rate": 9.999665927775917e-05, "loss": 1.2793, "step": 345 }, { "epoch": 0.01, "learning_rate": 9.999652153195869e-05, "loss": 1.2845, "step": 350 }, { "epoch": 0.01, "learning_rate": 9.999638100354509e-05, "loss": 1.2857, "step": 355 }, { "epoch": 0.01, "learning_rate": 9.999623769252623e-05, "loss": 1.2867, "step": 360 }, { "epoch": 0.01, "learning_rate": 9.999609159891011e-05, "loss": 1.2796, "step": 365 }, { "epoch": 0.01, "learning_rate": 9.99959427227048e-05, "loss": 1.2779, "step": 370 }, { "epoch": 0.01, "learning_rate": 9.999579106391861e-05, "loss": 1.2833, "step": 375 }, { "epoch": 0.01, "learning_rate": 9.999563662256e-05, "loss": 1.2804, "step": 380 }, { "epoch": 0.01, "learning_rate": 9.999547939863755e-05, "loss": 1.2705, "step": 385 }, { "epoch": 0.01, "learning_rate": 9.999531939216001e-05, "loss": 1.2764, "step": 390 }, { "epoch": 0.01, "learning_rate": 9.99951566031363e-05, "loss": 1.2666, "step": 395 }, { "epoch": 0.01, "learning_rate": 9.999499103157545e-05, "loss": 1.2571, "step": 400 }, { "epoch": 0.01, "learning_rate": 9.99948226774867e-05, "loss": 1.2646, "step": 405 }, { "epoch": 0.01, "learning_rate": 9.999465154087941e-05, "loss": 1.2436, "step": 410 }, { "epoch": 0.01, "learning_rate": 9.999447762176312e-05, "loss": 1.2548, "step": 415 }, { "epoch": 0.01, "learning_rate": 9.999430092014749e-05, "loss": 1.2607, "step": 420 }, { "epoch": 0.01, "learning_rate": 9.999412143604237e-05, "loss": 1.2279, "step": 425 }, { "epoch": 0.01, "learning_rate": 9.999393916945771e-05, "loss": 1.2484, "step": 430 }, { "epoch": 0.01, "learning_rate": 9.999375412040373e-05, "loss": 1.2771, "step": 435 }, { "epoch": 0.01, "learning_rate": 9.999356628889065e-05, "loss": 1.2444, "step": 440 }, { "epoch": 0.01, "learning_rate": 9.999337567492897e-05, "loss": 1.2431, "step": 445 }, { "epoch": 0.01, "learning_rate": 9.99931822785293e-05, "loss": 1.2807, "step": 450 }, { "epoch": 0.01, "learning_rate": 9.999298609970238e-05, "loss": 1.2479, "step": 455 }, { "epoch": 0.01, "learning_rate": 9.999278713845914e-05, "loss": 1.265, "step": 460 }, { "epoch": 0.01, "learning_rate": 9.999258539481064e-05, "loss": 1.2593, "step": 465 }, { "epoch": 0.01, "learning_rate": 9.999238086876814e-05, "loss": 1.2639, "step": 470 }, { "epoch": 0.01, "learning_rate": 9.9992173560343e-05, "loss": 1.2639, "step": 475 }, { "epoch": 0.01, "learning_rate": 9.999196346954676e-05, "loss": 1.2406, "step": 480 }, { "epoch": 0.01, "learning_rate": 9.999175059639112e-05, "loss": 1.2501, "step": 485 }, { "epoch": 0.01, "learning_rate": 9.999153494088793e-05, "loss": 1.256, "step": 490 }, { "epoch": 0.01, "learning_rate": 9.999131650304918e-05, "loss": 1.2583, "step": 495 }, { "epoch": 0.01, "learning_rate": 9.999109528288702e-05, "loss": 1.2572, "step": 500 }, { "epoch": 0.01, "learning_rate": 9.99908712804138e-05, "loss": 1.2479, "step": 505 }, { "epoch": 0.01, "learning_rate": 9.999064449564194e-05, "loss": 1.2546, "step": 510 }, { "epoch": 0.01, "learning_rate": 9.99904149285841e-05, "loss": 1.2495, "step": 515 }, { "epoch": 0.01, "learning_rate": 9.999018257925304e-05, "loss": 1.261, "step": 520 }, { "epoch": 0.01, "learning_rate": 9.998994744766167e-05, "loss": 1.2497, "step": 525 }, { "epoch": 0.01, "learning_rate": 9.998970953382312e-05, "loss": 1.2201, "step": 530 }, { "epoch": 0.01, "learning_rate": 9.99894688377506e-05, "loss": 1.2369, "step": 535 }, { "epoch": 0.01, "learning_rate": 9.998922535945753e-05, "loss": 1.2367, "step": 540 }, { "epoch": 0.01, "learning_rate": 9.998897909895743e-05, "loss": 1.2316, "step": 545 }, { "epoch": 0.01, "learning_rate": 9.998873005626405e-05, "loss": 1.2383, "step": 550 }, { "epoch": 0.01, "learning_rate": 9.998847823139122e-05, "loss": 1.1939, "step": 555 }, { "epoch": 0.01, "learning_rate": 9.998822362435294e-05, "loss": 1.2225, "step": 560 }, { "epoch": 0.01, "learning_rate": 9.998796623516342e-05, "loss": 1.2252, "step": 565 }, { "epoch": 0.01, "learning_rate": 9.998770606383696e-05, "loss": 1.2138, "step": 570 }, { "epoch": 0.01, "learning_rate": 9.998744311038804e-05, "loss": 1.2478, "step": 575 }, { "epoch": 0.01, "learning_rate": 9.99871773748313e-05, "loss": 1.2211, "step": 580 }, { "epoch": 0.01, "learning_rate": 9.998690885718153e-05, "loss": 1.2302, "step": 585 }, { "epoch": 0.01, "learning_rate": 9.99866375574537e-05, "loss": 1.2183, "step": 590 }, { "epoch": 0.01, "learning_rate": 9.998636347566286e-05, "loss": 1.1991, "step": 595 }, { "epoch": 0.01, "learning_rate": 9.99860866118243e-05, "loss": 1.2242, "step": 600 }, { "epoch": 0.01, "learning_rate": 9.998580696595341e-05, "loss": 1.2265, "step": 605 }, { "epoch": 0.01, "learning_rate": 9.998552453806577e-05, "loss": 1.1854, "step": 610 }, { "epoch": 0.01, "learning_rate": 9.99852393281771e-05, "loss": 1.2017, "step": 615 }, { "epoch": 0.01, "learning_rate": 9.998495133630326e-05, "loss": 1.2062, "step": 620 }, { "epoch": 0.01, "learning_rate": 9.998466056246028e-05, "loss": 1.2227, "step": 625 }, { "epoch": 0.01, "learning_rate": 9.998436700666434e-05, "loss": 1.2395, "step": 630 }, { "epoch": 0.01, "learning_rate": 9.998407066893179e-05, "loss": 1.2111, "step": 635 }, { "epoch": 0.01, "learning_rate": 9.998377154927912e-05, "loss": 1.2186, "step": 640 }, { "epoch": 0.01, "learning_rate": 9.998346964772297e-05, "loss": 1.2173, "step": 645 }, { "epoch": 0.01, "learning_rate": 9.998316496428015e-05, "loss": 1.2156, "step": 650 }, { "epoch": 0.01, "learning_rate": 9.998285749896763e-05, "loss": 1.1918, "step": 655 }, { "epoch": 0.01, "learning_rate": 9.99825472518025e-05, "loss": 1.1924, "step": 660 }, { "epoch": 0.01, "learning_rate": 9.998223422280203e-05, "loss": 1.2125, "step": 665 }, { "epoch": 0.01, "learning_rate": 9.998191841198366e-05, "loss": 1.2363, "step": 670 }, { "epoch": 0.01, "learning_rate": 9.998159981936494e-05, "loss": 1.1935, "step": 675 }, { "epoch": 0.01, "learning_rate": 9.998127844496362e-05, "loss": 1.2096, "step": 680 }, { "epoch": 0.01, "learning_rate": 9.998095428879759e-05, "loss": 1.1839, "step": 685 }, { "epoch": 0.01, "learning_rate": 9.998062735088488e-05, "loss": 1.2157, "step": 690 }, { "epoch": 0.01, "learning_rate": 9.99802976312437e-05, "loss": 1.1945, "step": 695 }, { "epoch": 0.01, "learning_rate": 9.997996512989237e-05, "loss": 1.1933, "step": 700 }, { "epoch": 0.01, "learning_rate": 9.997962984684943e-05, "loss": 1.1876, "step": 705 }, { "epoch": 0.01, "learning_rate": 9.997929178213352e-05, "loss": 1.1968, "step": 710 }, { "epoch": 0.01, "learning_rate": 9.997895093576347e-05, "loss": 1.2195, "step": 715 }, { "epoch": 0.01, "learning_rate": 9.997860730775824e-05, "loss": 1.2006, "step": 720 }, { "epoch": 0.01, "learning_rate": 9.997826089813695e-05, "loss": 1.1774, "step": 725 }, { "epoch": 0.01, "learning_rate": 9.99779117069189e-05, "loss": 1.2336, "step": 730 }, { "epoch": 0.01, "learning_rate": 9.997755973412352e-05, "loss": 1.1911, "step": 735 }, { "epoch": 0.01, "learning_rate": 9.997720497977038e-05, "loss": 1.2351, "step": 740 }, { "epoch": 0.01, "learning_rate": 9.997684744387922e-05, "loss": 1.1989, "step": 745 }, { "epoch": 0.01, "learning_rate": 9.997648712646999e-05, "loss": 1.2023, "step": 750 }, { "epoch": 0.01, "learning_rate": 9.99761240275627e-05, "loss": 1.1881, "step": 755 }, { "epoch": 0.01, "learning_rate": 9.997575814717754e-05, "loss": 1.1686, "step": 760 }, { "epoch": 0.01, "learning_rate": 9.997538948533491e-05, "loss": 1.1871, "step": 765 }, { "epoch": 0.01, "learning_rate": 9.997501804205533e-05, "loss": 1.206, "step": 770 }, { "epoch": 0.01, "learning_rate": 9.997464381735945e-05, "loss": 1.1834, "step": 775 }, { "epoch": 0.01, "learning_rate": 9.997426681126814e-05, "loss": 1.1843, "step": 780 }, { "epoch": 0.01, "learning_rate": 9.997388702380232e-05, "loss": 1.199, "step": 785 }, { "epoch": 0.01, "learning_rate": 9.99735044549832e-05, "loss": 1.1846, "step": 790 }, { "epoch": 0.01, "learning_rate": 9.9973119104832e-05, "loss": 1.1997, "step": 795 }, { "epoch": 0.01, "learning_rate": 9.99727309733702e-05, "loss": 1.2053, "step": 800 }, { "epoch": 0.01, "learning_rate": 9.99723400606194e-05, "loss": 1.1928, "step": 805 }, { "epoch": 0.01, "learning_rate": 9.997194636660137e-05, "loss": 1.1883, "step": 810 }, { "epoch": 0.01, "learning_rate": 9.997154989133801e-05, "loss": 1.1763, "step": 815 }, { "epoch": 0.01, "learning_rate": 9.997115063485139e-05, "loss": 1.1771, "step": 820 }, { "epoch": 0.01, "learning_rate": 9.997074859716372e-05, "loss": 1.2167, "step": 825 }, { "epoch": 0.01, "learning_rate": 9.997034377829738e-05, "loss": 1.1641, "step": 830 }, { "epoch": 0.01, "learning_rate": 9.996993617827492e-05, "loss": 1.1759, "step": 835 }, { "epoch": 0.01, "learning_rate": 9.996952579711898e-05, "loss": 1.1873, "step": 840 }, { "epoch": 0.01, "learning_rate": 9.996911263485246e-05, "loss": 1.1915, "step": 845 }, { "epoch": 0.01, "learning_rate": 9.99686966914983e-05, "loss": 1.1871, "step": 850 }, { "epoch": 0.01, "learning_rate": 9.996827796707971e-05, "loss": 1.1721, "step": 855 }, { "epoch": 0.01, "learning_rate": 9.996785646161993e-05, "loss": 1.1879, "step": 860 }, { "epoch": 0.01, "learning_rate": 9.996743217514243e-05, "loss": 1.1693, "step": 865 }, { "epoch": 0.01, "learning_rate": 9.996700510767088e-05, "loss": 1.1661, "step": 870 }, { "epoch": 0.01, "learning_rate": 9.9966575259229e-05, "loss": 1.1909, "step": 875 }, { "epoch": 0.01, "learning_rate": 9.996614262984072e-05, "loss": 1.1875, "step": 880 }, { "epoch": 0.01, "learning_rate": 9.996570721953014e-05, "loss": 1.1672, "step": 885 }, { "epoch": 0.01, "learning_rate": 9.996526902832145e-05, "loss": 1.1706, "step": 890 }, { "epoch": 0.01, "learning_rate": 9.996482805623908e-05, "loss": 1.181, "step": 895 }, { "epoch": 0.01, "learning_rate": 9.996438430330755e-05, "loss": 1.1594, "step": 900 }, { "epoch": 0.01, "learning_rate": 9.996393776955158e-05, "loss": 1.174, "step": 905 }, { "epoch": 0.01, "learning_rate": 9.996348845499599e-05, "loss": 1.1857, "step": 910 }, { "epoch": 0.01, "learning_rate": 9.996303635966581e-05, "loss": 1.1919, "step": 915 }, { "epoch": 0.01, "learning_rate": 9.996258148358622e-05, "loss": 1.1871, "step": 920 }, { "epoch": 0.01, "learning_rate": 9.996212382678249e-05, "loss": 1.1691, "step": 925 }, { "epoch": 0.01, "learning_rate": 9.996166338928011e-05, "loss": 1.1621, "step": 930 }, { "epoch": 0.01, "learning_rate": 9.996120017110472e-05, "loss": 1.1804, "step": 935 }, { "epoch": 0.01, "learning_rate": 9.996073417228209e-05, "loss": 1.1686, "step": 940 }, { "epoch": 0.01, "learning_rate": 9.996026539283816e-05, "loss": 1.1713, "step": 945 }, { "epoch": 0.01, "learning_rate": 9.995979383279901e-05, "loss": 1.1661, "step": 950 }, { "epoch": 0.01, "learning_rate": 9.99593194921909e-05, "loss": 1.1869, "step": 955 }, { "epoch": 0.01, "learning_rate": 9.995884237104023e-05, "loss": 1.1507, "step": 960 }, { "epoch": 0.01, "learning_rate": 9.995836246937353e-05, "loss": 1.1755, "step": 965 }, { "epoch": 0.01, "learning_rate": 9.995787978721753e-05, "loss": 1.141, "step": 970 }, { "epoch": 0.01, "learning_rate": 9.995739432459912e-05, "loss": 1.2079, "step": 975 }, { "epoch": 0.01, "learning_rate": 9.995690608154526e-05, "loss": 1.1833, "step": 980 }, { "epoch": 0.01, "learning_rate": 9.995641505808316e-05, "loss": 1.1784, "step": 985 }, { "epoch": 0.01, "learning_rate": 9.995592125424014e-05, "loss": 1.1951, "step": 990 }, { "epoch": 0.01, "learning_rate": 9.99554246700437e-05, "loss": 1.1824, "step": 995 }, { "epoch": 0.01, "learning_rate": 9.995492530552145e-05, "loss": 1.1757, "step": 1000 }, { "epoch": 0.02, "learning_rate": 9.995442316070121e-05, "loss": 1.1649, "step": 1005 }, { "epoch": 0.02, "learning_rate": 9.99539182356109e-05, "loss": 1.1586, "step": 1010 }, { "epoch": 0.02, "learning_rate": 9.995341053027865e-05, "loss": 1.1551, "step": 1015 }, { "epoch": 0.02, "learning_rate": 9.99529000447327e-05, "loss": 1.1689, "step": 1020 }, { "epoch": 0.02, "learning_rate": 9.995238677900145e-05, "loss": 1.1711, "step": 1025 }, { "epoch": 0.02, "learning_rate": 9.995187073311349e-05, "loss": 1.14, "step": 1030 }, { "epoch": 0.02, "learning_rate": 9.995135190709753e-05, "loss": 1.1228, "step": 1035 }, { "epoch": 0.02, "learning_rate": 9.995083030098245e-05, "loss": 1.1304, "step": 1040 }, { "epoch": 0.02, "learning_rate": 9.995030591479728e-05, "loss": 1.1491, "step": 1045 }, { "epoch": 0.02, "learning_rate": 9.99497787485712e-05, "loss": 1.1357, "step": 1050 }, { "epoch": 0.02, "learning_rate": 9.994924880233357e-05, "loss": 1.1683, "step": 1055 }, { "epoch": 0.02, "learning_rate": 9.994871607611385e-05, "loss": 1.1614, "step": 1060 }, { "epoch": 0.02, "learning_rate": 9.99481805699417e-05, "loss": 1.1657, "step": 1065 }, { "epoch": 0.02, "learning_rate": 9.994764228384697e-05, "loss": 1.1719, "step": 1070 }, { "epoch": 0.02, "learning_rate": 9.994710121785956e-05, "loss": 1.146, "step": 1075 }, { "epoch": 0.02, "learning_rate": 9.99465573720096e-05, "loss": 1.1369, "step": 1080 }, { "epoch": 0.02, "learning_rate": 9.994601074632735e-05, "loss": 1.1646, "step": 1085 }, { "epoch": 0.02, "learning_rate": 9.994546134084327e-05, "loss": 1.1445, "step": 1090 }, { "epoch": 0.02, "learning_rate": 9.994490915558791e-05, "loss": 1.1265, "step": 1095 }, { "epoch": 0.02, "learning_rate": 9.994435419059201e-05, "loss": 1.1388, "step": 1100 }, { "epoch": 0.02, "learning_rate": 9.994379644588644e-05, "loss": 1.1522, "step": 1105 }, { "epoch": 0.02, "learning_rate": 9.994323592150225e-05, "loss": 1.1438, "step": 1110 }, { "epoch": 0.02, "learning_rate": 9.994267261747067e-05, "loss": 1.1536, "step": 1115 }, { "epoch": 0.02, "learning_rate": 9.994210653382301e-05, "loss": 1.1578, "step": 1120 }, { "epoch": 0.02, "learning_rate": 9.994153767059077e-05, "loss": 1.1143, "step": 1125 }, { "epoch": 0.02, "learning_rate": 9.994096602780568e-05, "loss": 1.1351, "step": 1130 }, { "epoch": 0.02, "learning_rate": 9.994039160549946e-05, "loss": 1.1481, "step": 1135 }, { "epoch": 0.02, "learning_rate": 9.993981440370415e-05, "loss": 1.1466, "step": 1140 }, { "epoch": 0.02, "learning_rate": 9.993923442245183e-05, "loss": 1.1438, "step": 1145 }, { "epoch": 0.02, "learning_rate": 9.993865166177481e-05, "loss": 1.1599, "step": 1150 }, { "epoch": 0.02, "learning_rate": 9.993806612170553e-05, "loss": 1.1422, "step": 1155 }, { "epoch": 0.02, "learning_rate": 9.993747780227655e-05, "loss": 1.1486, "step": 1160 }, { "epoch": 0.02, "learning_rate": 9.993688670352061e-05, "loss": 1.1449, "step": 1165 }, { "epoch": 0.02, "learning_rate": 9.993629282547066e-05, "loss": 1.1461, "step": 1170 }, { "epoch": 0.02, "learning_rate": 9.99356961681597e-05, "loss": 1.1397, "step": 1175 }, { "epoch": 0.02, "learning_rate": 9.993509673162096e-05, "loss": 1.1736, "step": 1180 }, { "epoch": 0.02, "learning_rate": 9.993449451588779e-05, "loss": 1.1208, "step": 1185 }, { "epoch": 0.02, "learning_rate": 9.993388952099372e-05, "loss": 1.1221, "step": 1190 }, { "epoch": 0.02, "learning_rate": 9.993328174697243e-05, "loss": 1.1434, "step": 1195 }, { "epoch": 0.02, "learning_rate": 9.993267119385771e-05, "loss": 1.1709, "step": 1200 }, { "epoch": 0.02, "learning_rate": 9.993205786168358e-05, "loss": 1.1393, "step": 1205 }, { "epoch": 0.02, "learning_rate": 9.993144175048415e-05, "loss": 1.1599, "step": 1210 }, { "epoch": 0.02, "learning_rate": 9.993082286029374e-05, "loss": 1.1329, "step": 1215 }, { "epoch": 0.02, "learning_rate": 9.993020119114675e-05, "loss": 1.155, "step": 1220 }, { "epoch": 0.02, "learning_rate": 9.992957674307781e-05, "loss": 1.12, "step": 1225 }, { "epoch": 0.02, "learning_rate": 9.992894951612169e-05, "loss": 1.167, "step": 1230 }, { "epoch": 0.02, "learning_rate": 9.992831951031325e-05, "loss": 1.1469, "step": 1235 }, { "epoch": 0.02, "learning_rate": 9.99276867256876e-05, "loss": 1.1625, "step": 1240 }, { "epoch": 0.02, "learning_rate": 9.992705116227993e-05, "loss": 1.1535, "step": 1245 }, { "epoch": 0.02, "learning_rate": 9.992641282012565e-05, "loss": 1.1459, "step": 1250 }, { "epoch": 0.02, "learning_rate": 9.992577169926023e-05, "loss": 1.1446, "step": 1255 }, { "epoch": 0.02, "learning_rate": 9.99251277997194e-05, "loss": 1.1011, "step": 1260 }, { "epoch": 0.02, "learning_rate": 9.992448112153899e-05, "loss": 1.1286, "step": 1265 }, { "epoch": 0.02, "learning_rate": 9.992383166475497e-05, "loss": 1.1356, "step": 1270 }, { "epoch": 0.02, "learning_rate": 9.992317942940348e-05, "loss": 1.1149, "step": 1275 }, { "epoch": 0.02, "learning_rate": 9.992252441552085e-05, "loss": 1.1418, "step": 1280 }, { "epoch": 0.02, "learning_rate": 9.992186662314353e-05, "loss": 1.1129, "step": 1285 }, { "epoch": 0.02, "learning_rate": 9.992120605230813e-05, "loss": 1.1364, "step": 1290 }, { "epoch": 0.02, "learning_rate": 9.99205427030514e-05, "loss": 1.1428, "step": 1295 }, { "epoch": 0.02, "learning_rate": 9.991987657541028e-05, "loss": 1.0988, "step": 1300 }, { "epoch": 0.02, "learning_rate": 9.991920766942184e-05, "loss": 1.1335, "step": 1305 }, { "epoch": 0.02, "learning_rate": 9.991853598512328e-05, "loss": 1.1272, "step": 1310 }, { "epoch": 0.02, "learning_rate": 9.991786152255201e-05, "loss": 1.0964, "step": 1315 }, { "epoch": 0.02, "learning_rate": 9.991718428174557e-05, "loss": 1.1188, "step": 1320 }, { "epoch": 0.02, "learning_rate": 9.991650426274165e-05, "loss": 1.1212, "step": 1325 }, { "epoch": 0.02, "learning_rate": 9.991582146557809e-05, "loss": 1.12, "step": 1330 }, { "epoch": 0.02, "learning_rate": 9.991513589029288e-05, "loss": 1.1465, "step": 1335 }, { "epoch": 0.02, "learning_rate": 9.99144475369242e-05, "loss": 1.1419, "step": 1340 }, { "epoch": 0.02, "learning_rate": 9.991375640551037e-05, "loss": 1.1391, "step": 1345 }, { "epoch": 0.02, "learning_rate": 9.991306249608982e-05, "loss": 1.1016, "step": 1350 }, { "epoch": 0.02, "learning_rate": 9.99123658087012e-05, "loss": 1.1131, "step": 1355 }, { "epoch": 0.02, "learning_rate": 9.991166634338328e-05, "loss": 1.1508, "step": 1360 }, { "epoch": 0.02, "learning_rate": 9.991096410017495e-05, "loss": 1.1262, "step": 1365 }, { "epoch": 0.02, "learning_rate": 9.991025907911535e-05, "loss": 1.1332, "step": 1370 }, { "epoch": 0.02, "learning_rate": 9.99095512802437e-05, "loss": 1.1345, "step": 1375 }, { "epoch": 0.02, "learning_rate": 9.990884070359938e-05, "loss": 1.1456, "step": 1380 }, { "epoch": 0.02, "learning_rate": 9.990812734922195e-05, "loss": 1.1337, "step": 1385 }, { "epoch": 0.02, "learning_rate": 9.99074112171511e-05, "loss": 1.1201, "step": 1390 }, { "epoch": 0.02, "learning_rate": 9.99066923074267e-05, "loss": 1.1322, "step": 1395 }, { "epoch": 0.02, "learning_rate": 9.990597062008878e-05, "loss": 1.1251, "step": 1400 }, { "epoch": 0.02, "learning_rate": 9.990524615517747e-05, "loss": 1.1282, "step": 1405 }, { "epoch": 0.02, "learning_rate": 9.990451891273312e-05, "loss": 1.1634, "step": 1410 }, { "epoch": 0.02, "learning_rate": 9.990378889279616e-05, "loss": 1.1364, "step": 1415 }, { "epoch": 0.02, "learning_rate": 9.990305609540727e-05, "loss": 1.1237, "step": 1420 }, { "epoch": 0.02, "learning_rate": 9.990232052060721e-05, "loss": 1.1294, "step": 1425 }, { "epoch": 0.02, "learning_rate": 9.990158216843694e-05, "loss": 1.1212, "step": 1430 }, { "epoch": 0.02, "learning_rate": 9.990084103893754e-05, "loss": 1.1359, "step": 1435 }, { "epoch": 0.02, "learning_rate": 9.990009713215024e-05, "loss": 1.1191, "step": 1440 }, { "epoch": 0.02, "learning_rate": 9.989935044811646e-05, "loss": 1.1157, "step": 1445 }, { "epoch": 0.02, "learning_rate": 9.989860098687777e-05, "loss": 1.1191, "step": 1450 }, { "epoch": 0.02, "learning_rate": 9.989784874847588e-05, "loss": 1.0954, "step": 1455 }, { "epoch": 0.02, "learning_rate": 9.989709373295263e-05, "loss": 1.098, "step": 1460 }, { "epoch": 0.02, "learning_rate": 9.989633594035006e-05, "loss": 1.1217, "step": 1465 }, { "epoch": 0.02, "learning_rate": 9.989557537071035e-05, "loss": 1.1369, "step": 1470 }, { "epoch": 0.02, "learning_rate": 9.989481202407582e-05, "loss": 1.0882, "step": 1475 }, { "epoch": 0.02, "learning_rate": 9.989404590048897e-05, "loss": 1.1016, "step": 1480 }, { "epoch": 0.02, "learning_rate": 9.98932769999924e-05, "loss": 1.1305, "step": 1485 }, { "epoch": 0.02, "learning_rate": 9.989250532262895e-05, "loss": 1.1073, "step": 1490 }, { "epoch": 0.02, "learning_rate": 9.989173086844157e-05, "loss": 1.1257, "step": 1495 }, { "epoch": 0.02, "learning_rate": 9.989095363747332e-05, "loss": 1.109, "step": 1500 }, { "epoch": 0.02, "learning_rate": 9.989017362976749e-05, "loss": 1.1517, "step": 1505 }, { "epoch": 0.02, "learning_rate": 9.988939084536748e-05, "loss": 1.0908, "step": 1510 }, { "epoch": 0.02, "learning_rate": 9.988860528431687e-05, "loss": 1.1204, "step": 1515 }, { "epoch": 0.02, "learning_rate": 9.988781694665936e-05, "loss": 1.1055, "step": 1520 }, { "epoch": 0.02, "learning_rate": 9.988702583243885e-05, "loss": 1.1212, "step": 1525 }, { "epoch": 0.02, "learning_rate": 9.988623194169937e-05, "loss": 1.1138, "step": 1530 }, { "epoch": 0.02, "learning_rate": 9.988543527448506e-05, "loss": 1.101, "step": 1535 }, { "epoch": 0.02, "learning_rate": 9.988463583084031e-05, "loss": 1.1293, "step": 1540 }, { "epoch": 0.02, "learning_rate": 9.98838336108096e-05, "loss": 1.1094, "step": 1545 }, { "epoch": 0.02, "learning_rate": 9.988302861443758e-05, "loss": 1.1367, "step": 1550 }, { "epoch": 0.02, "learning_rate": 9.988222084176904e-05, "loss": 1.1311, "step": 1555 }, { "epoch": 0.02, "learning_rate": 9.988141029284895e-05, "loss": 1.1178, "step": 1560 }, { "epoch": 0.02, "learning_rate": 9.988059696772242e-05, "loss": 1.0996, "step": 1565 }, { "epoch": 0.02, "learning_rate": 9.98797808664347e-05, "loss": 1.1185, "step": 1570 }, { "epoch": 0.02, "learning_rate": 9.987896198903123e-05, "loss": 1.1165, "step": 1575 }, { "epoch": 0.02, "learning_rate": 9.98781403355576e-05, "loss": 1.1249, "step": 1580 }, { "epoch": 0.02, "learning_rate": 9.987731590605951e-05, "loss": 1.0921, "step": 1585 }, { "epoch": 0.02, "learning_rate": 9.987648870058285e-05, "loss": 1.107, "step": 1590 }, { "epoch": 0.02, "learning_rate": 9.987565871917366e-05, "loss": 1.0984, "step": 1595 }, { "epoch": 0.02, "learning_rate": 9.987482596187816e-05, "loss": 1.1035, "step": 1600 }, { "epoch": 0.02, "learning_rate": 9.987399042874265e-05, "loss": 1.1179, "step": 1605 }, { "epoch": 0.02, "learning_rate": 9.987315211981368e-05, "loss": 1.0903, "step": 1610 }, { "epoch": 0.02, "learning_rate": 9.987231103513788e-05, "loss": 1.1082, "step": 1615 }, { "epoch": 0.02, "learning_rate": 9.987146717476206e-05, "loss": 1.1188, "step": 1620 }, { "epoch": 0.02, "learning_rate": 9.98706205387332e-05, "loss": 1.1086, "step": 1625 }, { "epoch": 0.02, "learning_rate": 9.986977112709842e-05, "loss": 1.125, "step": 1630 }, { "epoch": 0.02, "learning_rate": 9.986891893990498e-05, "loss": 1.0921, "step": 1635 }, { "epoch": 0.02, "learning_rate": 9.986806397720031e-05, "loss": 1.128, "step": 1640 }, { "epoch": 0.02, "learning_rate": 9.986720623903201e-05, "loss": 1.1026, "step": 1645 }, { "epoch": 0.02, "learning_rate": 9.986634572544782e-05, "loss": 1.1297, "step": 1650 }, { "epoch": 0.02, "learning_rate": 9.98654824364956e-05, "loss": 1.1009, "step": 1655 }, { "epoch": 0.02, "learning_rate": 9.986461637222343e-05, "loss": 1.1304, "step": 1660 }, { "epoch": 0.02, "learning_rate": 9.986374753267953e-05, "loss": 1.1161, "step": 1665 }, { "epoch": 0.03, "learning_rate": 9.98628759179122e-05, "loss": 1.107, "step": 1670 }, { "epoch": 0.03, "learning_rate": 9.986200152796998e-05, "loss": 1.0963, "step": 1675 }, { "epoch": 0.03, "learning_rate": 9.986112436290155e-05, "loss": 1.1042, "step": 1680 }, { "epoch": 0.03, "learning_rate": 9.98602444227557e-05, "loss": 1.1222, "step": 1685 }, { "epoch": 0.03, "learning_rate": 9.985936170758143e-05, "loss": 1.12, "step": 1690 }, { "epoch": 0.03, "learning_rate": 9.985847621742786e-05, "loss": 1.0753, "step": 1695 }, { "epoch": 0.03, "learning_rate": 9.985758795234426e-05, "loss": 1.1085, "step": 1700 }, { "epoch": 0.03, "learning_rate": 9.985669691238006e-05, "loss": 1.0874, "step": 1705 }, { "epoch": 0.03, "learning_rate": 9.985580309758489e-05, "loss": 1.107, "step": 1710 }, { "epoch": 0.03, "learning_rate": 9.985490650800848e-05, "loss": 1.0924, "step": 1715 }, { "epoch": 0.03, "learning_rate": 9.985400714370072e-05, "loss": 1.1117, "step": 1720 }, { "epoch": 0.03, "learning_rate": 9.985310500471167e-05, "loss": 1.109, "step": 1725 }, { "epoch": 0.03, "learning_rate": 9.985220009109153e-05, "loss": 1.1092, "step": 1730 }, { "epoch": 0.03, "learning_rate": 9.98512924028907e-05, "loss": 1.0889, "step": 1735 }, { "epoch": 0.03, "learning_rate": 9.985038194015965e-05, "loss": 1.0964, "step": 1740 }, { "epoch": 0.03, "learning_rate": 9.984946870294909e-05, "loss": 1.0751, "step": 1745 }, { "epoch": 0.03, "learning_rate": 9.984855269130983e-05, "loss": 1.1212, "step": 1750 }, { "epoch": 0.03, "learning_rate": 9.984763390529285e-05, "loss": 1.0991, "step": 1755 }, { "epoch": 0.03, "learning_rate": 9.98467123449493e-05, "loss": 1.1127, "step": 1760 }, { "epoch": 0.03, "learning_rate": 9.984578801033044e-05, "loss": 1.1134, "step": 1765 }, { "epoch": 0.03, "learning_rate": 9.984486090148775e-05, "loss": 1.1059, "step": 1770 }, { "epoch": 0.03, "learning_rate": 9.984393101847282e-05, "loss": 1.1109, "step": 1775 }, { "epoch": 0.03, "learning_rate": 9.98429983613374e-05, "loss": 1.1134, "step": 1780 }, { "epoch": 0.03, "learning_rate": 9.98420629301334e-05, "loss": 1.1175, "step": 1785 }, { "epoch": 0.03, "learning_rate": 9.984112472491287e-05, "loss": 1.0792, "step": 1790 }, { "epoch": 0.03, "learning_rate": 9.984018374572804e-05, "loss": 1.1044, "step": 1795 }, { "epoch": 0.03, "learning_rate": 9.983923999263128e-05, "loss": 1.0867, "step": 1800 }, { "epoch": 0.03, "learning_rate": 9.983829346567512e-05, "loss": 1.0776, "step": 1805 }, { "epoch": 0.03, "learning_rate": 9.983734416491221e-05, "loss": 1.1339, "step": 1810 }, { "epoch": 0.03, "learning_rate": 9.983639209039544e-05, "loss": 1.1014, "step": 1815 }, { "epoch": 0.03, "learning_rate": 9.983543724217775e-05, "loss": 1.1496, "step": 1820 }, { "epoch": 0.03, "learning_rate": 9.98344796203123e-05, "loss": 1.0812, "step": 1825 }, { "epoch": 0.03, "learning_rate": 9.98335192248524e-05, "loss": 1.1032, "step": 1830 }, { "epoch": 0.03, "learning_rate": 9.983255605585147e-05, "loss": 1.1036, "step": 1835 }, { "epoch": 0.03, "learning_rate": 9.983159011336314e-05, "loss": 1.1129, "step": 1840 }, { "epoch": 0.03, "learning_rate": 9.983062139744116e-05, "loss": 1.0992, "step": 1845 }, { "epoch": 0.03, "learning_rate": 9.982964990813948e-05, "loss": 1.1146, "step": 1850 }, { "epoch": 0.03, "learning_rate": 9.982867564551212e-05, "loss": 1.0765, "step": 1855 }, { "epoch": 0.03, "learning_rate": 9.982769860961335e-05, "loss": 1.0794, "step": 1860 }, { "epoch": 0.03, "learning_rate": 9.982671880049748e-05, "loss": 1.102, "step": 1865 }, { "epoch": 0.03, "learning_rate": 9.98257362182191e-05, "loss": 1.1095, "step": 1870 }, { "epoch": 0.03, "learning_rate": 9.98247508628329e-05, "loss": 1.0844, "step": 1875 }, { "epoch": 0.03, "learning_rate": 9.98237627343937e-05, "loss": 1.0904, "step": 1880 }, { "epoch": 0.03, "learning_rate": 9.982277183295647e-05, "loss": 1.0835, "step": 1885 }, { "epoch": 0.03, "learning_rate": 9.982177815857641e-05, "loss": 1.1046, "step": 1890 }, { "epoch": 0.03, "learning_rate": 9.982078171130879e-05, "loss": 1.0891, "step": 1895 }, { "epoch": 0.03, "learning_rate": 9.981978249120909e-05, "loss": 1.0909, "step": 1900 }, { "epoch": 0.03, "learning_rate": 9.981878049833291e-05, "loss": 1.0952, "step": 1905 }, { "epoch": 0.03, "learning_rate": 9.981777573273603e-05, "loss": 1.093, "step": 1910 }, { "epoch": 0.03, "learning_rate": 9.981676819447435e-05, "loss": 1.1086, "step": 1915 }, { "epoch": 0.03, "learning_rate": 9.981575788360396e-05, "loss": 1.0958, "step": 1920 }, { "epoch": 0.03, "learning_rate": 9.981474480018109e-05, "loss": 1.0702, "step": 1925 }, { "epoch": 0.03, "learning_rate": 9.981372894426212e-05, "loss": 1.1093, "step": 1930 }, { "epoch": 0.03, "learning_rate": 9.981271031590358e-05, "loss": 1.1163, "step": 1935 }, { "epoch": 0.03, "learning_rate": 9.981168891516219e-05, "loss": 1.0722, "step": 1940 }, { "epoch": 0.03, "learning_rate": 9.981066474209478e-05, "loss": 1.0951, "step": 1945 }, { "epoch": 0.03, "learning_rate": 9.980963779675834e-05, "loss": 1.0643, "step": 1950 }, { "epoch": 0.03, "learning_rate": 9.980860807921004e-05, "loss": 1.0938, "step": 1955 }, { "epoch": 0.03, "learning_rate": 9.980757558950717e-05, "loss": 1.0879, "step": 1960 }, { "epoch": 0.03, "learning_rate": 9.980654032770725e-05, "loss": 1.0685, "step": 1965 }, { "epoch": 0.03, "learning_rate": 9.980550229386782e-05, "loss": 1.0769, "step": 1970 }, { "epoch": 0.03, "learning_rate": 9.980446148804673e-05, "loss": 1.0833, "step": 1975 }, { "epoch": 0.03, "learning_rate": 9.980341791030186e-05, "loss": 1.0963, "step": 1980 }, { "epoch": 0.03, "learning_rate": 9.98023715606913e-05, "loss": 1.0972, "step": 1985 }, { "epoch": 0.03, "learning_rate": 9.980132243927327e-05, "loss": 1.086, "step": 1990 }, { "epoch": 0.03, "learning_rate": 9.98002705461062e-05, "loss": 1.095, "step": 1995 }, { "epoch": 0.03, "learning_rate": 9.979921588124861e-05, "loss": 1.0916, "step": 2000 }, { "epoch": 0.03, "learning_rate": 9.97981584447592e-05, "loss": 1.0984, "step": 2005 }, { "epoch": 0.03, "learning_rate": 9.979709823669682e-05, "loss": 1.0959, "step": 2010 }, { "epoch": 0.03, "learning_rate": 9.979603525712048e-05, "loss": 1.097, "step": 2015 }, { "epoch": 0.03, "learning_rate": 9.979496950608936e-05, "loss": 1.0754, "step": 2020 }, { "epoch": 0.03, "learning_rate": 9.979390098366273e-05, "loss": 1.09, "step": 2025 }, { "epoch": 0.03, "learning_rate": 9.979282968990009e-05, "loss": 1.0705, "step": 2030 }, { "epoch": 0.03, "learning_rate": 9.979175562486108e-05, "loss": 1.1052, "step": 2035 }, { "epoch": 0.03, "learning_rate": 9.979067878860545e-05, "loss": 1.0919, "step": 2040 }, { "epoch": 0.03, "learning_rate": 9.978959918119315e-05, "loss": 1.0852, "step": 2045 }, { "epoch": 0.03, "learning_rate": 9.978851680268425e-05, "loss": 1.096, "step": 2050 }, { "epoch": 0.03, "learning_rate": 9.978743165313901e-05, "loss": 1.0863, "step": 2055 }, { "epoch": 0.03, "learning_rate": 9.978634373261782e-05, "loss": 1.0914, "step": 2060 }, { "epoch": 0.03, "learning_rate": 9.978525304118121e-05, "loss": 1.0636, "step": 2065 }, { "epoch": 0.03, "learning_rate": 9.978415957888989e-05, "loss": 1.0604, "step": 2070 }, { "epoch": 0.03, "learning_rate": 9.978306334580475e-05, "loss": 1.0869, "step": 2075 }, { "epoch": 0.03, "learning_rate": 9.978196434198677e-05, "loss": 1.0739, "step": 2080 }, { "epoch": 0.03, "learning_rate": 9.978086256749712e-05, "loss": 1.0859, "step": 2085 }, { "epoch": 0.03, "learning_rate": 9.977975802239712e-05, "loss": 1.0872, "step": 2090 }, { "epoch": 0.03, "learning_rate": 9.977865070674828e-05, "loss": 1.0964, "step": 2095 }, { "epoch": 0.03, "learning_rate": 9.977754062061218e-05, "loss": 1.0658, "step": 2100 }, { "epoch": 0.03, "learning_rate": 9.977642776405061e-05, "loss": 1.0931, "step": 2105 }, { "epoch": 0.03, "learning_rate": 9.977531213712553e-05, "loss": 1.0898, "step": 2110 }, { "epoch": 0.03, "learning_rate": 9.977419373989903e-05, "loss": 1.0722, "step": 2115 }, { "epoch": 0.03, "learning_rate": 9.977307257243334e-05, "loss": 1.1016, "step": 2120 }, { "epoch": 0.03, "learning_rate": 9.977194863479087e-05, "loss": 1.0638, "step": 2125 }, { "epoch": 0.03, "learning_rate": 9.977082192703415e-05, "loss": 1.1018, "step": 2130 }, { "epoch": 0.03, "learning_rate": 9.976969244922594e-05, "loss": 1.0768, "step": 2135 }, { "epoch": 0.03, "learning_rate": 9.976856020142904e-05, "loss": 1.0857, "step": 2140 }, { "epoch": 0.03, "learning_rate": 9.976742518370652e-05, "loss": 1.0742, "step": 2145 }, { "epoch": 0.03, "learning_rate": 9.976628739612153e-05, "loss": 1.0741, "step": 2150 }, { "epoch": 0.03, "learning_rate": 9.976514683873738e-05, "loss": 1.0934, "step": 2155 }, { "epoch": 0.03, "learning_rate": 9.976400351161757e-05, "loss": 1.1107, "step": 2160 }, { "epoch": 0.03, "learning_rate": 9.976285741482571e-05, "loss": 1.0965, "step": 2165 }, { "epoch": 0.03, "learning_rate": 9.976170854842562e-05, "loss": 1.0842, "step": 2170 }, { "epoch": 0.03, "learning_rate": 9.976055691248123e-05, "loss": 1.0811, "step": 2175 }, { "epoch": 0.03, "learning_rate": 9.975940250705661e-05, "loss": 1.0691, "step": 2180 }, { "epoch": 0.03, "learning_rate": 9.975824533221604e-05, "loss": 1.0831, "step": 2185 }, { "epoch": 0.03, "learning_rate": 9.975708538802391e-05, "loss": 1.0532, "step": 2190 }, { "epoch": 0.03, "learning_rate": 9.975592267454478e-05, "loss": 1.0706, "step": 2195 }, { "epoch": 0.03, "learning_rate": 9.975475719184337e-05, "loss": 1.0703, "step": 2200 }, { "epoch": 0.03, "learning_rate": 9.975358893998454e-05, "loss": 1.0746, "step": 2205 }, { "epoch": 0.03, "learning_rate": 9.975241791903331e-05, "loss": 1.0689, "step": 2210 }, { "epoch": 0.03, "learning_rate": 9.975124412905485e-05, "loss": 1.0691, "step": 2215 }, { "epoch": 0.03, "learning_rate": 9.975006757011449e-05, "loss": 1.067, "step": 2220 }, { "epoch": 0.03, "learning_rate": 9.974888824227773e-05, "loss": 1.0699, "step": 2225 }, { "epoch": 0.03, "learning_rate": 9.974770614561017e-05, "loss": 1.0704, "step": 2230 }, { "epoch": 0.03, "learning_rate": 9.974652128017765e-05, "loss": 1.0772, "step": 2235 }, { "epoch": 0.03, "learning_rate": 9.974533364604608e-05, "loss": 1.0628, "step": 2240 }, { "epoch": 0.03, "learning_rate": 9.974414324328156e-05, "loss": 1.0731, "step": 2245 }, { "epoch": 0.03, "learning_rate": 9.974295007195036e-05, "loss": 1.0691, "step": 2250 }, { "epoch": 0.03, "learning_rate": 9.974175413211886e-05, "loss": 1.0932, "step": 2255 }, { "epoch": 0.03, "learning_rate": 9.974055542385366e-05, "loss": 1.0755, "step": 2260 }, { "epoch": 0.03, "learning_rate": 9.973935394722144e-05, "loss": 1.0848, "step": 2265 }, { "epoch": 0.03, "learning_rate": 9.97381497022891e-05, "loss": 1.0695, "step": 2270 }, { "epoch": 0.03, "learning_rate": 9.973694268912364e-05, "loss": 1.0692, "step": 2275 }, { "epoch": 0.03, "learning_rate": 9.973573290779226e-05, "loss": 1.0755, "step": 2280 }, { "epoch": 0.03, "learning_rate": 9.973452035836227e-05, "loss": 1.0968, "step": 2285 }, { "epoch": 0.03, "learning_rate": 9.973330504090115e-05, "loss": 1.0945, "step": 2290 }, { "epoch": 0.03, "learning_rate": 9.973208695547656e-05, "loss": 1.0471, "step": 2295 }, { "epoch": 0.03, "learning_rate": 9.97308661021563e-05, "loss": 1.0915, "step": 2300 }, { "epoch": 0.03, "learning_rate": 9.972964248100829e-05, "loss": 1.0827, "step": 2305 }, { "epoch": 0.03, "learning_rate": 9.972841609210067e-05, "loss": 1.0597, "step": 2310 }, { "epoch": 0.03, "learning_rate": 9.972718693550165e-05, "loss": 1.057, "step": 2315 }, { "epoch": 0.03, "learning_rate": 9.972595501127966e-05, "loss": 1.0775, "step": 2320 }, { "epoch": 0.03, "learning_rate": 9.972472031950329e-05, "loss": 1.078, "step": 2325 }, { "epoch": 0.03, "learning_rate": 9.972348286024122e-05, "loss": 1.0855, "step": 2330 }, { "epoch": 0.04, "learning_rate": 9.972224263356235e-05, "loss": 1.0959, "step": 2335 }, { "epoch": 0.04, "learning_rate": 9.972099963953569e-05, "loss": 1.0529, "step": 2340 }, { "epoch": 0.04, "learning_rate": 9.971975387823042e-05, "loss": 1.0671, "step": 2345 }, { "epoch": 0.04, "learning_rate": 9.971850534971587e-05, "loss": 1.0622, "step": 2350 }, { "epoch": 0.04, "learning_rate": 9.971725405406155e-05, "loss": 1.0293, "step": 2355 }, { "epoch": 0.04, "learning_rate": 9.97159999913371e-05, "loss": 1.0593, "step": 2360 }, { "epoch": 0.04, "learning_rate": 9.971474316161229e-05, "loss": 1.0586, "step": 2365 }, { "epoch": 0.04, "learning_rate": 9.971348356495709e-05, "loss": 1.0799, "step": 2370 }, { "epoch": 0.04, "learning_rate": 9.971222120144159e-05, "loss": 1.0533, "step": 2375 }, { "epoch": 0.04, "learning_rate": 9.971095607113607e-05, "loss": 1.0392, "step": 2380 }, { "epoch": 0.04, "learning_rate": 9.970968817411093e-05, "loss": 1.0844, "step": 2385 }, { "epoch": 0.04, "learning_rate": 9.970841751043673e-05, "loss": 1.0995, "step": 2390 }, { "epoch": 0.04, "learning_rate": 9.970714408018422e-05, "loss": 1.059, "step": 2395 }, { "epoch": 0.04, "learning_rate": 9.970586788342423e-05, "loss": 1.0795, "step": 2400 }, { "epoch": 0.04, "learning_rate": 9.970458892022783e-05, "loss": 1.0399, "step": 2405 }, { "epoch": 0.04, "learning_rate": 9.970330719066616e-05, "loss": 1.0759, "step": 2410 }, { "epoch": 0.04, "learning_rate": 9.970202269481058e-05, "loss": 1.0893, "step": 2415 }, { "epoch": 0.04, "learning_rate": 9.970073543273259e-05, "loss": 1.0777, "step": 2420 }, { "epoch": 0.04, "learning_rate": 9.969944540450382e-05, "loss": 1.0742, "step": 2425 }, { "epoch": 0.04, "learning_rate": 9.969815261019607e-05, "loss": 1.0633, "step": 2430 }, { "epoch": 0.04, "learning_rate": 9.969685704988128e-05, "loss": 1.0663, "step": 2435 }, { "epoch": 0.04, "learning_rate": 9.969555872363157e-05, "loss": 1.0527, "step": 2440 }, { "epoch": 0.04, "learning_rate": 9.969425763151921e-05, "loss": 1.0772, "step": 2445 }, { "epoch": 0.04, "learning_rate": 9.969295377361658e-05, "loss": 1.0915, "step": 2450 }, { "epoch": 0.04, "learning_rate": 9.969164714999628e-05, "loss": 1.0643, "step": 2455 }, { "epoch": 0.04, "learning_rate": 9.969033776073102e-05, "loss": 1.0536, "step": 2460 }, { "epoch": 0.04, "learning_rate": 9.968902560589368e-05, "loss": 1.059, "step": 2465 }, { "epoch": 0.04, "learning_rate": 9.968771068555729e-05, "loss": 1.0783, "step": 2470 }, { "epoch": 0.04, "learning_rate": 9.968639299979503e-05, "loss": 1.0988, "step": 2475 }, { "epoch": 0.04, "learning_rate": 9.968507254868023e-05, "loss": 1.0745, "step": 2480 }, { "epoch": 0.04, "learning_rate": 9.96837493322864e-05, "loss": 1.0484, "step": 2485 }, { "epoch": 0.04, "learning_rate": 9.968242335068716e-05, "loss": 1.0568, "step": 2490 }, { "epoch": 0.04, "learning_rate": 9.968109460395631e-05, "loss": 1.0445, "step": 2495 }, { "epoch": 0.04, "learning_rate": 9.967976309216783e-05, "loss": 1.0738, "step": 2500 }, { "epoch": 0.04, "learning_rate": 9.967842881539582e-05, "loss": 1.0444, "step": 2505 }, { "epoch": 0.04, "learning_rate": 9.967709177371451e-05, "loss": 1.0589, "step": 2510 }, { "epoch": 0.04, "learning_rate": 9.967575196719836e-05, "loss": 1.0745, "step": 2515 }, { "epoch": 0.04, "learning_rate": 9.96744093959219e-05, "loss": 1.0612, "step": 2520 }, { "epoch": 0.04, "learning_rate": 9.967306405995986e-05, "loss": 1.0736, "step": 2525 }, { "epoch": 0.04, "learning_rate": 9.967171595938714e-05, "loss": 1.0711, "step": 2530 }, { "epoch": 0.04, "learning_rate": 9.967036509427876e-05, "loss": 1.0623, "step": 2535 }, { "epoch": 0.04, "learning_rate": 9.966901146470989e-05, "loss": 1.0621, "step": 2540 }, { "epoch": 0.04, "learning_rate": 9.966765507075587e-05, "loss": 1.079, "step": 2545 }, { "epoch": 0.04, "learning_rate": 9.966629591249219e-05, "loss": 1.0515, "step": 2550 }, { "epoch": 0.04, "learning_rate": 9.96649339899945e-05, "loss": 1.0652, "step": 2555 }, { "epoch": 0.04, "learning_rate": 9.966356930333863e-05, "loss": 1.0877, "step": 2560 }, { "epoch": 0.04, "learning_rate": 9.96622018526005e-05, "loss": 1.0641, "step": 2565 }, { "epoch": 0.04, "learning_rate": 9.96608316378562e-05, "loss": 1.0687, "step": 2570 }, { "epoch": 0.04, "learning_rate": 9.965945865918202e-05, "loss": 1.0705, "step": 2575 }, { "epoch": 0.04, "learning_rate": 9.965808291665438e-05, "loss": 1.0713, "step": 2580 }, { "epoch": 0.04, "learning_rate": 9.965670441034982e-05, "loss": 1.031, "step": 2585 }, { "epoch": 0.04, "learning_rate": 9.965532314034508e-05, "loss": 1.0984, "step": 2590 }, { "epoch": 0.04, "learning_rate": 9.965393910671705e-05, "loss": 1.0649, "step": 2595 }, { "epoch": 0.04, "learning_rate": 9.965255230954272e-05, "loss": 1.0676, "step": 2600 }, { "epoch": 0.04, "learning_rate": 9.965116274889932e-05, "loss": 1.0557, "step": 2605 }, { "epoch": 0.04, "learning_rate": 9.964977042486416e-05, "loss": 1.0759, "step": 2610 }, { "epoch": 0.04, "learning_rate": 9.964837533751475e-05, "loss": 1.0821, "step": 2615 }, { "epoch": 0.04, "learning_rate": 9.96469774869287e-05, "loss": 1.063, "step": 2620 }, { "epoch": 0.04, "learning_rate": 9.964557687318384e-05, "loss": 1.034, "step": 2625 }, { "epoch": 0.04, "learning_rate": 9.96441734963581e-05, "loss": 1.0599, "step": 2630 }, { "epoch": 0.04, "learning_rate": 9.964276735652962e-05, "loss": 1.0513, "step": 2635 }, { "epoch": 0.04, "learning_rate": 9.964135845377665e-05, "loss": 1.0704, "step": 2640 }, { "epoch": 0.04, "learning_rate": 9.963994678817757e-05, "loss": 1.0632, "step": 2645 }, { "epoch": 0.04, "learning_rate": 9.9638532359811e-05, "loss": 1.0582, "step": 2650 }, { "epoch": 0.04, "learning_rate": 9.963711516875562e-05, "loss": 1.0491, "step": 2655 }, { "epoch": 0.04, "learning_rate": 9.963569521509031e-05, "loss": 1.0821, "step": 2660 }, { "epoch": 0.04, "learning_rate": 9.963427249889412e-05, "loss": 1.0569, "step": 2665 }, { "epoch": 0.04, "learning_rate": 9.963284702024622e-05, "loss": 1.0831, "step": 2670 }, { "epoch": 0.04, "learning_rate": 9.963141877922595e-05, "loss": 1.0563, "step": 2675 }, { "epoch": 0.04, "learning_rate": 9.962998777591279e-05, "loss": 1.0764, "step": 2680 }, { "epoch": 0.04, "learning_rate": 9.962855401038641e-05, "loss": 1.072, "step": 2685 }, { "epoch": 0.04, "learning_rate": 9.962711748272658e-05, "loss": 1.0592, "step": 2690 }, { "epoch": 0.04, "learning_rate": 9.962567819301327e-05, "loss": 1.0605, "step": 2695 }, { "epoch": 0.04, "learning_rate": 9.962423614132657e-05, "loss": 1.0474, "step": 2700 }, { "epoch": 0.04, "learning_rate": 9.962279132774674e-05, "loss": 1.0657, "step": 2705 }, { "epoch": 0.04, "learning_rate": 9.962134375235422e-05, "loss": 1.0645, "step": 2710 }, { "epoch": 0.04, "learning_rate": 9.961989341522954e-05, "loss": 1.0565, "step": 2715 }, { "epoch": 0.04, "learning_rate": 9.961844031645344e-05, "loss": 1.0591, "step": 2720 }, { "epoch": 0.04, "learning_rate": 9.961698445610679e-05, "loss": 1.0873, "step": 2725 }, { "epoch": 0.04, "learning_rate": 9.961552583427063e-05, "loss": 1.0702, "step": 2730 }, { "epoch": 0.04, "learning_rate": 9.961406445102611e-05, "loss": 1.0745, "step": 2735 }, { "epoch": 0.04, "learning_rate": 9.961260030645459e-05, "loss": 1.0666, "step": 2740 }, { "epoch": 0.04, "learning_rate": 9.961113340063756e-05, "loss": 1.0609, "step": 2745 }, { "epoch": 0.04, "learning_rate": 9.960966373365666e-05, "loss": 1.0465, "step": 2750 }, { "epoch": 0.04, "learning_rate": 9.960819130559365e-05, "loss": 1.0658, "step": 2755 }, { "epoch": 0.04, "learning_rate": 9.960671611653054e-05, "loss": 1.0623, "step": 2760 }, { "epoch": 0.04, "learning_rate": 9.960523816654938e-05, "loss": 1.0574, "step": 2765 }, { "epoch": 0.04, "learning_rate": 9.960375745573247e-05, "loss": 1.0297, "step": 2770 }, { "epoch": 0.04, "learning_rate": 9.96022739841622e-05, "loss": 1.0453, "step": 2775 }, { "epoch": 0.04, "learning_rate": 9.960078775192113e-05, "loss": 1.0549, "step": 2780 }, { "epoch": 0.04, "learning_rate": 9.959929875909199e-05, "loss": 1.0602, "step": 2785 }, { "epoch": 0.04, "learning_rate": 9.959780700575763e-05, "loss": 1.0539, "step": 2790 }, { "epoch": 0.04, "learning_rate": 9.959631249200109e-05, "loss": 1.0496, "step": 2795 }, { "epoch": 0.04, "learning_rate": 9.959481521790557e-05, "loss": 1.0765, "step": 2800 }, { "epoch": 0.04, "learning_rate": 9.959331518355437e-05, "loss": 1.0439, "step": 2805 }, { "epoch": 0.04, "learning_rate": 9.9591812389031e-05, "loss": 1.0538, "step": 2810 }, { "epoch": 0.04, "learning_rate": 9.959030683441908e-05, "loss": 1.0317, "step": 2815 }, { "epoch": 0.04, "learning_rate": 9.95887985198024e-05, "loss": 1.0719, "step": 2820 }, { "epoch": 0.04, "learning_rate": 9.958728744526493e-05, "loss": 1.0639, "step": 2825 }, { "epoch": 0.04, "learning_rate": 9.958577361089074e-05, "loss": 1.0419, "step": 2830 }, { "epoch": 0.04, "learning_rate": 9.958425701676413e-05, "loss": 1.0461, "step": 2835 }, { "epoch": 0.04, "learning_rate": 9.958273766296945e-05, "loss": 1.0561, "step": 2840 }, { "epoch": 0.04, "learning_rate": 9.958121554959133e-05, "loss": 1.0307, "step": 2845 }, { "epoch": 0.04, "learning_rate": 9.957969067671442e-05, "loss": 1.0563, "step": 2850 }, { "epoch": 0.04, "learning_rate": 9.957816304442361e-05, "loss": 1.0336, "step": 2855 }, { "epoch": 0.04, "learning_rate": 9.957663265280395e-05, "loss": 1.0482, "step": 2860 }, { "epoch": 0.04, "learning_rate": 9.957509950194058e-05, "loss": 1.0741, "step": 2865 }, { "epoch": 0.04, "learning_rate": 9.957356359191882e-05, "loss": 1.063, "step": 2870 }, { "epoch": 0.04, "learning_rate": 9.957202492282421e-05, "loss": 1.0706, "step": 2875 }, { "epoch": 0.04, "learning_rate": 9.957048349474234e-05, "loss": 1.0396, "step": 2880 }, { "epoch": 0.04, "learning_rate": 9.956893930775901e-05, "loss": 1.058, "step": 2885 }, { "epoch": 0.04, "learning_rate": 9.956739236196016e-05, "loss": 1.0551, "step": 2890 }, { "epoch": 0.04, "learning_rate": 9.95658426574319e-05, "loss": 1.0622, "step": 2895 }, { "epoch": 0.04, "learning_rate": 9.956429019426048e-05, "loss": 1.0685, "step": 2900 }, { "epoch": 0.04, "learning_rate": 9.956273497253228e-05, "loss": 1.0745, "step": 2905 }, { "epoch": 0.04, "learning_rate": 9.956117699233387e-05, "loss": 1.0565, "step": 2910 }, { "epoch": 0.04, "learning_rate": 9.955961625375198e-05, "loss": 1.0249, "step": 2915 }, { "epoch": 0.04, "learning_rate": 9.955805275687347e-05, "loss": 1.0748, "step": 2920 }, { "epoch": 0.04, "learning_rate": 9.955648650178533e-05, "loss": 1.0276, "step": 2925 }, { "epoch": 0.04, "learning_rate": 9.955491748857475e-05, "loss": 1.0557, "step": 2930 }, { "epoch": 0.04, "learning_rate": 9.955334571732906e-05, "loss": 1.0437, "step": 2935 }, { "epoch": 0.04, "learning_rate": 9.955177118813573e-05, "loss": 1.0843, "step": 2940 }, { "epoch": 0.04, "learning_rate": 9.95501939010824e-05, "loss": 1.0517, "step": 2945 }, { "epoch": 0.04, "learning_rate": 9.954861385625684e-05, "loss": 1.0737, "step": 2950 }, { "epoch": 0.04, "learning_rate": 9.9547031053747e-05, "loss": 1.0374, "step": 2955 }, { "epoch": 0.04, "learning_rate": 9.9545445493641e-05, "loss": 1.0485, "step": 2960 }, { "epoch": 0.04, "learning_rate": 9.954385717602704e-05, "loss": 1.047, "step": 2965 }, { "epoch": 0.04, "learning_rate": 9.954226610099354e-05, "loss": 1.0705, "step": 2970 }, { "epoch": 0.04, "learning_rate": 9.954067226862907e-05, "loss": 1.066, "step": 2975 }, { "epoch": 0.04, "learning_rate": 9.95390756790223e-05, "loss": 1.0545, "step": 2980 }, { "epoch": 0.04, "learning_rate": 9.953747633226212e-05, "loss": 1.0682, "step": 2985 }, { "epoch": 0.04, "learning_rate": 9.953587422843751e-05, "loss": 1.0516, "step": 2990 }, { "epoch": 0.04, "learning_rate": 9.953426936763768e-05, "loss": 1.0342, "step": 2995 }, { "epoch": 0.04, "learning_rate": 9.953266174995192e-05, "loss": 1.06, "step": 3000 }, { "epoch": 0.05, "learning_rate": 9.953105137546972e-05, "loss": 1.056, "step": 3005 }, { "epoch": 0.05, "learning_rate": 9.95294382442807e-05, "loss": 1.0542, "step": 3010 }, { "epoch": 0.05, "learning_rate": 9.952782235647465e-05, "loss": 1.0528, "step": 3015 }, { "epoch": 0.05, "learning_rate": 9.952620371214147e-05, "loss": 1.0574, "step": 3020 }, { "epoch": 0.05, "learning_rate": 9.95245823113713e-05, "loss": 1.0465, "step": 3025 }, { "epoch": 0.05, "learning_rate": 9.952295815425433e-05, "loss": 1.0288, "step": 3030 }, { "epoch": 0.05, "learning_rate": 9.9521331240881e-05, "loss": 1.0213, "step": 3035 }, { "epoch": 0.05, "learning_rate": 9.951970157134181e-05, "loss": 1.0303, "step": 3040 }, { "epoch": 0.05, "learning_rate": 9.95180691457275e-05, "loss": 1.026, "step": 3045 }, { "epoch": 0.05, "learning_rate": 9.951643396412893e-05, "loss": 1.0279, "step": 3050 }, { "epoch": 0.05, "learning_rate": 9.951479602663707e-05, "loss": 1.0342, "step": 3055 }, { "epoch": 0.05, "learning_rate": 9.95131553333431e-05, "loss": 1.0596, "step": 3060 }, { "epoch": 0.05, "learning_rate": 9.951151188433833e-05, "loss": 1.0289, "step": 3065 }, { "epoch": 0.05, "learning_rate": 9.950986567971426e-05, "loss": 1.0556, "step": 3070 }, { "epoch": 0.05, "learning_rate": 9.950821671956246e-05, "loss": 1.0523, "step": 3075 }, { "epoch": 0.05, "learning_rate": 9.950656500397472e-05, "loss": 1.047, "step": 3080 }, { "epoch": 0.05, "learning_rate": 9.950491053304301e-05, "loss": 1.0335, "step": 3085 }, { "epoch": 0.05, "learning_rate": 9.950325330685936e-05, "loss": 1.0247, "step": 3090 }, { "epoch": 0.05, "learning_rate": 9.950159332551603e-05, "loss": 1.0597, "step": 3095 }, { "epoch": 0.05, "learning_rate": 9.949993058910539e-05, "loss": 1.0273, "step": 3100 }, { "epoch": 0.05, "learning_rate": 9.949826509772e-05, "loss": 1.063, "step": 3105 }, { "epoch": 0.05, "learning_rate": 9.949659685145255e-05, "loss": 1.0298, "step": 3110 }, { "epoch": 0.05, "learning_rate": 9.949492585039588e-05, "loss": 1.0295, "step": 3115 }, { "epoch": 0.05, "learning_rate": 9.9493252094643e-05, "loss": 1.0316, "step": 3120 }, { "epoch": 0.05, "learning_rate": 9.949157558428707e-05, "loss": 1.0494, "step": 3125 }, { "epoch": 0.05, "learning_rate": 9.948989631942139e-05, "loss": 1.0634, "step": 3130 }, { "epoch": 0.05, "learning_rate": 9.948821430013941e-05, "loss": 1.0442, "step": 3135 }, { "epoch": 0.05, "learning_rate": 9.948652952653476e-05, "loss": 1.0406, "step": 3140 }, { "epoch": 0.05, "learning_rate": 9.948484199870122e-05, "loss": 1.0599, "step": 3145 }, { "epoch": 0.05, "learning_rate": 9.948315171673268e-05, "loss": 1.0358, "step": 3150 }, { "epoch": 0.05, "learning_rate": 9.948145868072324e-05, "loss": 1.0495, "step": 3155 }, { "epoch": 0.05, "learning_rate": 9.947976289076712e-05, "loss": 1.0484, "step": 3160 }, { "epoch": 0.05, "learning_rate": 9.947806434695868e-05, "loss": 1.0632, "step": 3165 }, { "epoch": 0.05, "learning_rate": 9.947636304939249e-05, "loss": 1.0618, "step": 3170 }, { "epoch": 0.05, "learning_rate": 9.947465899816321e-05, "loss": 1.051, "step": 3175 }, { "epoch": 0.05, "learning_rate": 9.947295219336571e-05, "loss": 1.0657, "step": 3180 }, { "epoch": 0.05, "learning_rate": 9.947124263509496e-05, "loss": 1.0712, "step": 3185 }, { "epoch": 0.05, "learning_rate": 9.94695303234461e-05, "loss": 1.0492, "step": 3190 }, { "epoch": 0.05, "learning_rate": 9.946781525851447e-05, "loss": 1.0498, "step": 3195 }, { "epoch": 0.05, "learning_rate": 9.946609744039547e-05, "loss": 1.0888, "step": 3200 }, { "epoch": 0.05, "learning_rate": 9.946437686918475e-05, "loss": 1.0534, "step": 3205 }, { "epoch": 0.05, "learning_rate": 9.946265354497805e-05, "loss": 1.0439, "step": 3210 }, { "epoch": 0.05, "learning_rate": 9.94609274678713e-05, "loss": 1.0249, "step": 3215 }, { "epoch": 0.05, "learning_rate": 9.945919863796056e-05, "loss": 1.021, "step": 3220 }, { "epoch": 0.05, "learning_rate": 9.945746705534204e-05, "loss": 1.0375, "step": 3225 }, { "epoch": 0.05, "learning_rate": 9.94557327201121e-05, "loss": 1.0588, "step": 3230 }, { "epoch": 0.05, "learning_rate": 9.945399563236732e-05, "loss": 1.0261, "step": 3235 }, { "epoch": 0.05, "learning_rate": 9.945225579220432e-05, "loss": 1.0317, "step": 3240 }, { "epoch": 0.05, "learning_rate": 9.945051319971996e-05, "loss": 1.0498, "step": 3245 }, { "epoch": 0.05, "learning_rate": 9.944876785501124e-05, "loss": 1.0589, "step": 3250 }, { "epoch": 0.05, "learning_rate": 9.944701975817528e-05, "loss": 1.0406, "step": 3255 }, { "epoch": 0.05, "learning_rate": 9.944526890930936e-05, "loss": 1.0222, "step": 3260 }, { "epoch": 0.05, "learning_rate": 9.944351530851095e-05, "loss": 1.0494, "step": 3265 }, { "epoch": 0.05, "learning_rate": 9.944175895587764e-05, "loss": 0.9962, "step": 3270 }, { "epoch": 0.05, "learning_rate": 9.943999985150718e-05, "loss": 1.0668, "step": 3275 }, { "epoch": 0.05, "learning_rate": 9.943823799549747e-05, "loss": 1.0624, "step": 3280 }, { "epoch": 0.05, "learning_rate": 9.943647338794659e-05, "loss": 1.0574, "step": 3285 }, { "epoch": 0.05, "learning_rate": 9.943470602895273e-05, "loss": 1.0297, "step": 3290 }, { "epoch": 0.05, "learning_rate": 9.943293591861425e-05, "loss": 1.0572, "step": 3295 }, { "epoch": 0.05, "learning_rate": 9.943116305702969e-05, "loss": 1.0403, "step": 3300 }, { "epoch": 0.05, "learning_rate": 9.94293874442977e-05, "loss": 1.0554, "step": 3305 }, { "epoch": 0.05, "learning_rate": 9.942760908051711e-05, "loss": 1.0595, "step": 3310 }, { "epoch": 0.05, "learning_rate": 9.94258279657869e-05, "loss": 1.0409, "step": 3315 }, { "epoch": 0.05, "learning_rate": 9.942404410020621e-05, "loss": 1.0153, "step": 3320 }, { "epoch": 0.05, "learning_rate": 9.942225748387431e-05, "loss": 1.0439, "step": 3325 }, { "epoch": 0.05, "learning_rate": 9.942046811689064e-05, "loss": 1.0395, "step": 3330 }, { "epoch": 0.05, "learning_rate": 9.941867599935478e-05, "loss": 1.0706, "step": 3335 }, { "epoch": 0.05, "learning_rate": 9.941688113136646e-05, "loss": 1.0384, "step": 3340 }, { "epoch": 0.05, "learning_rate": 9.941508351302562e-05, "loss": 1.0343, "step": 3345 }, { "epoch": 0.05, "learning_rate": 9.941328314443228e-05, "loss": 1.0436, "step": 3350 }, { "epoch": 0.05, "learning_rate": 9.941148002568665e-05, "loss": 1.0598, "step": 3355 }, { "epoch": 0.05, "learning_rate": 9.940967415688907e-05, "loss": 1.0593, "step": 3360 }, { "epoch": 0.05, "learning_rate": 9.940786553814005e-05, "loss": 1.0434, "step": 3365 }, { "epoch": 0.05, "learning_rate": 9.940605416954028e-05, "loss": 1.0316, "step": 3370 }, { "epoch": 0.05, "learning_rate": 9.940424005119054e-05, "loss": 1.0338, "step": 3375 }, { "epoch": 0.05, "learning_rate": 9.940242318319181e-05, "loss": 1.0682, "step": 3380 }, { "epoch": 0.05, "learning_rate": 9.940060356564521e-05, "loss": 1.0579, "step": 3385 }, { "epoch": 0.05, "learning_rate": 9.9398781198652e-05, "loss": 1.0514, "step": 3390 }, { "epoch": 0.05, "learning_rate": 9.939695608231363e-05, "loss": 1.0356, "step": 3395 }, { "epoch": 0.05, "learning_rate": 9.939512821673167e-05, "loss": 1.029, "step": 3400 }, { "epoch": 0.05, "learning_rate": 9.939329760200784e-05, "loss": 1.0286, "step": 3405 }, { "epoch": 0.05, "learning_rate": 9.939146423824404e-05, "loss": 1.0399, "step": 3410 }, { "epoch": 0.05, "learning_rate": 9.93896281255423e-05, "loss": 1.0531, "step": 3415 }, { "epoch": 0.05, "learning_rate": 9.93877892640048e-05, "loss": 1.0308, "step": 3420 }, { "epoch": 0.05, "learning_rate": 9.93859476537339e-05, "loss": 1.0352, "step": 3425 }, { "epoch": 0.05, "learning_rate": 9.938410329483209e-05, "loss": 1.0615, "step": 3430 }, { "epoch": 0.05, "learning_rate": 9.938225618740203e-05, "loss": 1.061, "step": 3435 }, { "epoch": 0.05, "learning_rate": 9.938040633154651e-05, "loss": 1.0408, "step": 3440 }, { "epoch": 0.05, "learning_rate": 9.937855372736849e-05, "loss": 1.0575, "step": 3445 }, { "epoch": 0.05, "learning_rate": 9.937669837497109e-05, "loss": 1.016, "step": 3450 }, { "epoch": 0.05, "learning_rate": 9.937484027445756e-05, "loss": 1.018, "step": 3455 }, { "epoch": 0.05, "learning_rate": 9.937297942593132e-05, "loss": 1.0246, "step": 3460 }, { "epoch": 0.05, "learning_rate": 9.937111582949592e-05, "loss": 1.0239, "step": 3465 }, { "epoch": 0.05, "learning_rate": 9.93692494852551e-05, "loss": 1.0493, "step": 3470 }, { "epoch": 0.05, "learning_rate": 9.936738039331273e-05, "loss": 1.0381, "step": 3475 }, { "epoch": 0.05, "learning_rate": 9.936550855377283e-05, "loss": 1.0307, "step": 3480 }, { "epoch": 0.05, "learning_rate": 9.93636339667396e-05, "loss": 1.0244, "step": 3485 }, { "epoch": 0.05, "learning_rate": 9.936175663231735e-05, "loss": 1.0359, "step": 3490 }, { "epoch": 0.05, "learning_rate": 9.935987655061056e-05, "loss": 1.0397, "step": 3495 }, { "epoch": 0.05, "learning_rate": 9.935799372172389e-05, "loss": 1.0403, "step": 3500 }, { "epoch": 0.05, "learning_rate": 9.935610814576211e-05, "loss": 1.045, "step": 3505 }, { "epoch": 0.05, "learning_rate": 9.935421982283018e-05, "loss": 1.0255, "step": 3510 }, { "epoch": 0.05, "learning_rate": 9.93523287530332e-05, "loss": 1.0326, "step": 3515 }, { "epoch": 0.05, "learning_rate": 9.93504349364764e-05, "loss": 1.0198, "step": 3520 }, { "epoch": 0.05, "learning_rate": 9.93485383732652e-05, "loss": 1.074, "step": 3525 }, { "epoch": 0.05, "learning_rate": 9.934663906350514e-05, "loss": 1.0291, "step": 3530 }, { "epoch": 0.05, "learning_rate": 9.934473700730195e-05, "loss": 1.0382, "step": 3535 }, { "epoch": 0.05, "learning_rate": 9.934283220476145e-05, "loss": 1.0634, "step": 3540 }, { "epoch": 0.05, "learning_rate": 9.934092465598972e-05, "loss": 1.0421, "step": 3545 }, { "epoch": 0.05, "learning_rate": 9.933901436109287e-05, "loss": 1.037, "step": 3550 }, { "epoch": 0.05, "learning_rate": 9.933710132017724e-05, "loss": 1.059, "step": 3555 }, { "epoch": 0.05, "learning_rate": 9.93351855333493e-05, "loss": 1.0182, "step": 3560 }, { "epoch": 0.05, "learning_rate": 9.933326700071568e-05, "loss": 1.0437, "step": 3565 }, { "epoch": 0.05, "learning_rate": 9.933134572238315e-05, "loss": 1.0562, "step": 3570 }, { "epoch": 0.05, "learning_rate": 9.932942169845865e-05, "loss": 1.0338, "step": 3575 }, { "epoch": 0.05, "learning_rate": 9.932749492904925e-05, "loss": 1.0489, "step": 3580 }, { "epoch": 0.05, "learning_rate": 9.93255654142622e-05, "loss": 1.0347, "step": 3585 }, { "epoch": 0.05, "learning_rate": 9.932363315420488e-05, "loss": 1.0342, "step": 3590 }, { "epoch": 0.05, "learning_rate": 9.932169814898484e-05, "loss": 1.0256, "step": 3595 }, { "epoch": 0.05, "learning_rate": 9.931976039870978e-05, "loss": 1.0307, "step": 3600 }, { "epoch": 0.05, "learning_rate": 9.931781990348754e-05, "loss": 1.0249, "step": 3605 }, { "epoch": 0.05, "learning_rate": 9.93158766634261e-05, "loss": 1.0531, "step": 3610 }, { "epoch": 0.05, "learning_rate": 9.931393067863366e-05, "loss": 1.042, "step": 3615 }, { "epoch": 0.05, "learning_rate": 9.93119819492185e-05, "loss": 1.0381, "step": 3620 }, { "epoch": 0.05, "learning_rate": 9.931003047528905e-05, "loss": 1.0288, "step": 3625 }, { "epoch": 0.05, "learning_rate": 9.930807625695396e-05, "loss": 1.0612, "step": 3630 }, { "epoch": 0.05, "learning_rate": 9.930611929432198e-05, "loss": 1.0321, "step": 3635 }, { "epoch": 0.05, "learning_rate": 9.930415958750204e-05, "loss": 1.0393, "step": 3640 }, { "epoch": 0.05, "learning_rate": 9.93021971366032e-05, "loss": 1.0426, "step": 3645 }, { "epoch": 0.05, "learning_rate": 9.930023194173467e-05, "loss": 1.0096, "step": 3650 }, { "epoch": 0.05, "learning_rate": 9.929826400300585e-05, "loss": 1.0275, "step": 3655 }, { "epoch": 0.05, "learning_rate": 9.929629332052627e-05, "loss": 1.0136, "step": 3660 }, { "epoch": 0.05, "learning_rate": 9.929431989440559e-05, "loss": 1.0282, "step": 3665 }, { "epoch": 0.06, "learning_rate": 9.929234372475363e-05, "loss": 1.0588, "step": 3670 }, { "epoch": 0.06, "learning_rate": 9.92903648116804e-05, "loss": 1.0388, "step": 3675 }, { "epoch": 0.06, "learning_rate": 9.928838315529605e-05, "loss": 0.9999, "step": 3680 }, { "epoch": 0.06, "learning_rate": 9.928639875571084e-05, "loss": 1.0369, "step": 3685 }, { "epoch": 0.06, "learning_rate": 9.928441161303523e-05, "loss": 1.0244, "step": 3690 }, { "epoch": 0.06, "learning_rate": 9.928242172737983e-05, "loss": 1.0381, "step": 3695 }, { "epoch": 0.06, "learning_rate": 9.928042909885536e-05, "loss": 1.0549, "step": 3700 }, { "epoch": 0.06, "learning_rate": 9.927843372757274e-05, "loss": 1.0206, "step": 3705 }, { "epoch": 0.06, "learning_rate": 9.927643561364303e-05, "loss": 1.0279, "step": 3710 }, { "epoch": 0.06, "learning_rate": 9.927443475717744e-05, "loss": 1.0542, "step": 3715 }, { "epoch": 0.06, "learning_rate": 9.92724311582873e-05, "loss": 1.0265, "step": 3720 }, { "epoch": 0.06, "learning_rate": 9.927042481708416e-05, "loss": 1.042, "step": 3725 }, { "epoch": 0.06, "learning_rate": 9.926841573367964e-05, "loss": 1.0402, "step": 3730 }, { "epoch": 0.06, "learning_rate": 9.92664039081856e-05, "loss": 1.0236, "step": 3735 }, { "epoch": 0.06, "learning_rate": 9.9264389340714e-05, "loss": 1.0478, "step": 3740 }, { "epoch": 0.06, "learning_rate": 9.926237203137695e-05, "loss": 1.0246, "step": 3745 }, { "epoch": 0.06, "learning_rate": 9.926035198028674e-05, "loss": 1.0588, "step": 3750 }, { "epoch": 0.06, "learning_rate": 9.925832918755579e-05, "loss": 1.0236, "step": 3755 }, { "epoch": 0.06, "learning_rate": 9.925630365329668e-05, "loss": 1.0059, "step": 3760 }, { "epoch": 0.06, "learning_rate": 9.925427537762215e-05, "loss": 1.0171, "step": 3765 }, { "epoch": 0.06, "learning_rate": 9.925224436064509e-05, "loss": 1.0181, "step": 3770 }, { "epoch": 0.06, "learning_rate": 9.925021060247852e-05, "loss": 1.0429, "step": 3775 }, { "epoch": 0.06, "learning_rate": 9.924817410323565e-05, "loss": 1.0143, "step": 3780 }, { "epoch": 0.06, "learning_rate": 9.92461348630298e-05, "loss": 1.0238, "step": 3785 }, { "epoch": 0.06, "learning_rate": 9.92440928819745e-05, "loss": 1.0313, "step": 3790 }, { "epoch": 0.06, "learning_rate": 9.924204816018337e-05, "loss": 1.0301, "step": 3795 }, { "epoch": 0.06, "learning_rate": 9.924000069777024e-05, "loss": 1.0439, "step": 3800 }, { "epoch": 0.06, "learning_rate": 9.923795049484903e-05, "loss": 1.0447, "step": 3805 }, { "epoch": 0.06, "learning_rate": 9.923589755153386e-05, "loss": 1.0189, "step": 3810 }, { "epoch": 0.06, "learning_rate": 9.9233841867939e-05, "loss": 1.0186, "step": 3815 }, { "epoch": 0.06, "learning_rate": 9.923178344417885e-05, "loss": 1.025, "step": 3820 }, { "epoch": 0.06, "learning_rate": 9.922972228036797e-05, "loss": 1.0428, "step": 3825 }, { "epoch": 0.06, "learning_rate": 9.922765837662109e-05, "loss": 1.0242, "step": 3830 }, { "epoch": 0.06, "learning_rate": 9.922559173305309e-05, "loss": 1.0263, "step": 3835 }, { "epoch": 0.06, "learning_rate": 9.922352234977895e-05, "loss": 1.0187, "step": 3840 }, { "epoch": 0.06, "learning_rate": 9.922145022691387e-05, "loss": 1.0188, "step": 3845 }, { "epoch": 0.06, "learning_rate": 9.921937536457318e-05, "loss": 1.033, "step": 3850 }, { "epoch": 0.06, "learning_rate": 9.921729776287234e-05, "loss": 1.0425, "step": 3855 }, { "epoch": 0.06, "learning_rate": 9.921521742192701e-05, "loss": 1.039, "step": 3860 }, { "epoch": 0.06, "learning_rate": 9.921313434185294e-05, "loss": 1.0377, "step": 3865 }, { "epoch": 0.06, "learning_rate": 9.921104852276611e-05, "loss": 1.0347, "step": 3870 }, { "epoch": 0.06, "learning_rate": 9.920895996478254e-05, "loss": 1.0368, "step": 3875 }, { "epoch": 0.06, "learning_rate": 9.920686866801855e-05, "loss": 1.0438, "step": 3880 }, { "epoch": 0.06, "learning_rate": 9.920477463259047e-05, "loss": 1.0459, "step": 3885 }, { "epoch": 0.06, "learning_rate": 9.92026778586149e-05, "loss": 1.0096, "step": 3890 }, { "epoch": 0.06, "learning_rate": 9.92005783462085e-05, "loss": 1.0113, "step": 3895 }, { "epoch": 0.06, "learning_rate": 9.919847609548814e-05, "loss": 1.0225, "step": 3900 }, { "epoch": 0.06, "learning_rate": 9.91963711065708e-05, "loss": 1.0327, "step": 3905 }, { "epoch": 0.06, "learning_rate": 9.919426337957364e-05, "loss": 1.0178, "step": 3910 }, { "epoch": 0.06, "learning_rate": 9.919215291461402e-05, "loss": 1.041, "step": 3915 }, { "epoch": 0.06, "learning_rate": 9.919003971180933e-05, "loss": 1.0351, "step": 3920 }, { "epoch": 0.06, "learning_rate": 9.918792377127722e-05, "loss": 1.02, "step": 3925 }, { "epoch": 0.06, "learning_rate": 9.918580509313542e-05, "loss": 1.0036, "step": 3930 }, { "epoch": 0.06, "learning_rate": 9.91836836775019e-05, "loss": 1.0239, "step": 3935 }, { "epoch": 0.06, "learning_rate": 9.91815595244947e-05, "loss": 1.0187, "step": 3940 }, { "epoch": 0.06, "learning_rate": 9.917943263423204e-05, "loss": 1.017, "step": 3945 }, { "epoch": 0.06, "learning_rate": 9.91773030068323e-05, "loss": 1.0306, "step": 3950 }, { "epoch": 0.06, "learning_rate": 9.917517064241402e-05, "loss": 1.0248, "step": 3955 }, { "epoch": 0.06, "learning_rate": 9.917303554109586e-05, "loss": 1.032, "step": 3960 }, { "epoch": 0.06, "learning_rate": 9.917089770299666e-05, "loss": 1.0196, "step": 3965 }, { "epoch": 0.06, "learning_rate": 9.916875712823539e-05, "loss": 1.0362, "step": 3970 }, { "epoch": 0.06, "learning_rate": 9.916661381693121e-05, "loss": 1.0579, "step": 3975 }, { "epoch": 0.06, "learning_rate": 9.916446776920338e-05, "loss": 1.0263, "step": 3980 }, { "epoch": 0.06, "learning_rate": 9.916231898517138e-05, "loss": 1.0384, "step": 3985 }, { "epoch": 0.06, "learning_rate": 9.916016746495475e-05, "loss": 0.9928, "step": 3990 }, { "epoch": 0.06, "learning_rate": 9.915801320867329e-05, "loss": 1.0228, "step": 3995 }, { "epoch": 0.06, "learning_rate": 9.915585621644687e-05, "loss": 1.0456, "step": 4000 }, { "epoch": 0.06, "learning_rate": 9.915369648839554e-05, "loss": 1.0363, "step": 4005 }, { "epoch": 0.06, "learning_rate": 9.91515340246395e-05, "loss": 1.0527, "step": 4010 }, { "epoch": 0.06, "learning_rate": 9.914936882529911e-05, "loss": 1.0294, "step": 4015 }, { "epoch": 0.06, "learning_rate": 9.914720089049489e-05, "loss": 1.0224, "step": 4020 }, { "epoch": 0.06, "learning_rate": 9.914503022034746e-05, "loss": 1.0304, "step": 4025 }, { "epoch": 0.06, "learning_rate": 9.914285681497769e-05, "loss": 1.0309, "step": 4030 }, { "epoch": 0.06, "learning_rate": 9.914068067450648e-05, "loss": 1.0161, "step": 4035 }, { "epoch": 0.06, "learning_rate": 9.913850179905497e-05, "loss": 1.0152, "step": 4040 }, { "epoch": 0.06, "learning_rate": 9.913632018874445e-05, "loss": 1.0141, "step": 4045 }, { "epoch": 0.06, "learning_rate": 9.913413584369631e-05, "loss": 1.0114, "step": 4050 }, { "epoch": 0.06, "learning_rate": 9.913194876403215e-05, "loss": 1.0148, "step": 4055 }, { "epoch": 0.06, "learning_rate": 9.912975894987367e-05, "loss": 1.0016, "step": 4060 }, { "epoch": 0.06, "learning_rate": 9.912756640134275e-05, "loss": 1.0124, "step": 4065 }, { "epoch": 0.06, "learning_rate": 9.912537111856144e-05, "loss": 1.0246, "step": 4070 }, { "epoch": 0.06, "learning_rate": 9.912317310165189e-05, "loss": 1.0334, "step": 4075 }, { "epoch": 0.06, "learning_rate": 9.912097235073645e-05, "loss": 1.0266, "step": 4080 }, { "epoch": 0.06, "learning_rate": 9.91187688659376e-05, "loss": 1.0303, "step": 4085 }, { "epoch": 0.06, "learning_rate": 9.911656264737799e-05, "loss": 1.035, "step": 4090 }, { "epoch": 0.06, "learning_rate": 9.91143536951804e-05, "loss": 0.993, "step": 4095 }, { "epoch": 0.06, "learning_rate": 9.911214200946777e-05, "loss": 1.0221, "step": 4100 }, { "epoch": 0.06, "learning_rate": 9.91099275903632e-05, "loss": 1.0482, "step": 4105 }, { "epoch": 0.06, "learning_rate": 9.910771043798993e-05, "loss": 0.998, "step": 4110 }, { "epoch": 0.06, "learning_rate": 9.910549055247136e-05, "loss": 1.0348, "step": 4115 }, { "epoch": 0.06, "learning_rate": 9.910326793393104e-05, "loss": 1.0149, "step": 4120 }, { "epoch": 0.06, "learning_rate": 9.910104258249269e-05, "loss": 1.0232, "step": 4125 }, { "epoch": 0.06, "learning_rate": 9.909881449828012e-05, "loss": 1.0151, "step": 4130 }, { "epoch": 0.06, "learning_rate": 9.909658368141738e-05, "loss": 1.0208, "step": 4135 }, { "epoch": 0.06, "learning_rate": 9.909435013202862e-05, "loss": 0.9901, "step": 4140 }, { "epoch": 0.06, "learning_rate": 9.909211385023811e-05, "loss": 1.0181, "step": 4145 }, { "epoch": 0.06, "learning_rate": 9.908987483617038e-05, "loss": 1.0159, "step": 4150 }, { "epoch": 0.06, "learning_rate": 9.908763308994999e-05, "loss": 1.0344, "step": 4155 }, { "epoch": 0.06, "learning_rate": 9.908538861170174e-05, "loss": 1.0296, "step": 4160 }, { "epoch": 0.06, "learning_rate": 9.908314140155054e-05, "loss": 1.0217, "step": 4165 }, { "epoch": 0.06, "learning_rate": 9.908089145962145e-05, "loss": 1.0262, "step": 4170 }, { "epoch": 0.06, "learning_rate": 9.907863878603972e-05, "loss": 0.9996, "step": 4175 }, { "epoch": 0.06, "learning_rate": 9.907638338093068e-05, "loss": 1.0244, "step": 4180 }, { "epoch": 0.06, "learning_rate": 9.907412524441989e-05, "loss": 1.0211, "step": 4185 }, { "epoch": 0.06, "learning_rate": 9.907186437663303e-05, "loss": 1.0099, "step": 4190 }, { "epoch": 0.06, "learning_rate": 9.906960077769591e-05, "loss": 1.0306, "step": 4195 }, { "epoch": 0.06, "learning_rate": 9.906733444773454e-05, "loss": 1.0108, "step": 4200 }, { "epoch": 0.06, "learning_rate": 9.906506538687503e-05, "loss": 1.0172, "step": 4205 }, { "epoch": 0.06, "learning_rate": 9.90627935952437e-05, "loss": 1.0563, "step": 4210 }, { "epoch": 0.06, "learning_rate": 9.906051907296695e-05, "loss": 1.0371, "step": 4215 }, { "epoch": 0.06, "learning_rate": 9.905824182017139e-05, "loss": 1.0337, "step": 4220 }, { "epoch": 0.06, "learning_rate": 9.905596183698378e-05, "loss": 1.0049, "step": 4225 }, { "epoch": 0.06, "learning_rate": 9.905367912353099e-05, "loss": 1.0269, "step": 4230 }, { "epoch": 0.06, "learning_rate": 9.905139367994006e-05, "loss": 1.0173, "step": 4235 }, { "epoch": 0.06, "learning_rate": 9.904910550633821e-05, "loss": 1.0291, "step": 4240 }, { "epoch": 0.06, "learning_rate": 9.904681460285278e-05, "loss": 1.0114, "step": 4245 }, { "epoch": 0.06, "learning_rate": 9.90445209696113e-05, "loss": 1.031, "step": 4250 }, { "epoch": 0.06, "learning_rate": 9.904222460674137e-05, "loss": 1.0085, "step": 4255 }, { "epoch": 0.06, "learning_rate": 9.903992551437086e-05, "loss": 1.0409, "step": 4260 }, { "epoch": 0.06, "learning_rate": 9.903762369262768e-05, "loss": 1.0262, "step": 4265 }, { "epoch": 0.06, "learning_rate": 9.903531914163996e-05, "loss": 1.0045, "step": 4270 }, { "epoch": 0.06, "learning_rate": 9.903301186153595e-05, "loss": 1.012, "step": 4275 }, { "epoch": 0.06, "learning_rate": 9.90307018524441e-05, "loss": 1.0324, "step": 4280 }, { "epoch": 0.06, "learning_rate": 9.902838911449292e-05, "loss": 1.0235, "step": 4285 }, { "epoch": 0.06, "learning_rate": 9.902607364781118e-05, "loss": 1.0205, "step": 4290 }, { "epoch": 0.06, "learning_rate": 9.902375545252773e-05, "loss": 1.0069, "step": 4295 }, { "epoch": 0.06, "learning_rate": 9.902143452877158e-05, "loss": 1.0294, "step": 4300 }, { "epoch": 0.06, "learning_rate": 9.901911087667192e-05, "loss": 1.0231, "step": 4305 }, { "epoch": 0.06, "learning_rate": 9.901678449635807e-05, "loss": 1.0244, "step": 4310 }, { "epoch": 0.06, "learning_rate": 9.901445538795949e-05, "loss": 1.043, "step": 4315 }, { "epoch": 0.06, "learning_rate": 9.901212355160586e-05, "loss": 1.0191, "step": 4320 }, { "epoch": 0.06, "learning_rate": 9.900978898742689e-05, "loss": 1.0433, "step": 4325 }, { "epoch": 0.06, "learning_rate": 9.900745169555257e-05, "loss": 1.0266, "step": 4330 }, { "epoch": 0.07, "learning_rate": 9.900511167611296e-05, "loss": 1.0032, "step": 4335 }, { "epoch": 0.07, "learning_rate": 9.90027689292383e-05, "loss": 1.0019, "step": 4340 }, { "epoch": 0.07, "learning_rate": 9.900042345505898e-05, "loss": 1.0156, "step": 4345 }, { "epoch": 0.07, "learning_rate": 9.899807525370555e-05, "loss": 0.9995, "step": 4350 }, { "epoch": 0.07, "learning_rate": 9.899572432530867e-05, "loss": 1.0183, "step": 4355 }, { "epoch": 0.07, "learning_rate": 9.89933706699992e-05, "loss": 1.0166, "step": 4360 }, { "epoch": 0.07, "learning_rate": 9.899101428790817e-05, "loss": 1.0374, "step": 4365 }, { "epoch": 0.07, "learning_rate": 9.898865517916669e-05, "loss": 1.0126, "step": 4370 }, { "epoch": 0.07, "learning_rate": 9.898629334390605e-05, "loss": 1.0392, "step": 4375 }, { "epoch": 0.07, "learning_rate": 9.898392878225772e-05, "loss": 1.0383, "step": 4380 }, { "epoch": 0.07, "learning_rate": 9.898156149435331e-05, "loss": 1.0114, "step": 4385 }, { "epoch": 0.07, "learning_rate": 9.897919148032455e-05, "loss": 1.0147, "step": 4390 }, { "epoch": 0.07, "learning_rate": 9.897681874030337e-05, "loss": 1.0114, "step": 4395 }, { "epoch": 0.07, "learning_rate": 9.89744432744218e-05, "loss": 1.0153, "step": 4400 }, { "epoch": 0.07, "learning_rate": 9.897206508281209e-05, "loss": 1.0164, "step": 4405 }, { "epoch": 0.07, "learning_rate": 9.896968416560655e-05, "loss": 1.0246, "step": 4410 }, { "epoch": 0.07, "learning_rate": 9.896730052293774e-05, "loss": 1.0397, "step": 4415 }, { "epoch": 0.07, "learning_rate": 9.89649141549383e-05, "loss": 1.0202, "step": 4420 }, { "epoch": 0.07, "learning_rate": 9.896252506174104e-05, "loss": 1.0047, "step": 4425 }, { "epoch": 0.07, "learning_rate": 9.896013324347894e-05, "loss": 0.9991, "step": 4430 }, { "epoch": 0.07, "learning_rate": 9.895773870028513e-05, "loss": 1.045, "step": 4435 }, { "epoch": 0.07, "learning_rate": 9.895534143229286e-05, "loss": 1.0063, "step": 4440 }, { "epoch": 0.07, "learning_rate": 9.895294143963554e-05, "loss": 1.0329, "step": 4445 }, { "epoch": 0.07, "learning_rate": 9.89505387224468e-05, "loss": 1.0333, "step": 4450 }, { "epoch": 0.07, "learning_rate": 9.89481332808603e-05, "loss": 1.0404, "step": 4455 }, { "epoch": 0.07, "learning_rate": 9.894572511500997e-05, "loss": 1.024, "step": 4460 }, { "epoch": 0.07, "learning_rate": 9.89433142250298e-05, "loss": 1.0297, "step": 4465 }, { "epoch": 0.07, "learning_rate": 9.894090061105399e-05, "loss": 1.0087, "step": 4470 }, { "epoch": 0.07, "learning_rate": 9.893848427321689e-05, "loss": 1.0175, "step": 4475 }, { "epoch": 0.07, "learning_rate": 9.893606521165293e-05, "loss": 1.0226, "step": 4480 }, { "epoch": 0.07, "learning_rate": 9.893364342649681e-05, "loss": 1.0294, "step": 4485 }, { "epoch": 0.07, "learning_rate": 9.893121891788328e-05, "loss": 0.9943, "step": 4490 }, { "epoch": 0.07, "learning_rate": 9.892879168594728e-05, "loss": 1.0126, "step": 4495 }, { "epoch": 0.07, "learning_rate": 9.892636173082392e-05, "loss": 1.0283, "step": 4500 }, { "epoch": 0.07, "learning_rate": 9.892392905264841e-05, "loss": 1.0183, "step": 4505 }, { "epoch": 0.07, "learning_rate": 9.892149365155618e-05, "loss": 1.0311, "step": 4510 }, { "epoch": 0.07, "learning_rate": 9.891905552768275e-05, "loss": 1.0521, "step": 4515 }, { "epoch": 0.07, "learning_rate": 9.891661468116381e-05, "loss": 1.0293, "step": 4520 }, { "epoch": 0.07, "learning_rate": 9.891417111213524e-05, "loss": 1.0178, "step": 4525 }, { "epoch": 0.07, "learning_rate": 9.8911724820733e-05, "loss": 1.0484, "step": 4530 }, { "epoch": 0.07, "learning_rate": 9.890927580709328e-05, "loss": 1.0162, "step": 4535 }, { "epoch": 0.07, "learning_rate": 9.890682407135237e-05, "loss": 1.043, "step": 4540 }, { "epoch": 0.07, "learning_rate": 9.890436961364671e-05, "loss": 1.0287, "step": 4545 }, { "epoch": 0.07, "learning_rate": 9.89019124341129e-05, "loss": 1.0107, "step": 4550 }, { "epoch": 0.07, "learning_rate": 9.889945253288773e-05, "loss": 1.0158, "step": 4555 }, { "epoch": 0.07, "learning_rate": 9.889698991010809e-05, "loss": 1.0041, "step": 4560 }, { "epoch": 0.07, "learning_rate": 9.889452456591105e-05, "loss": 1.0074, "step": 4565 }, { "epoch": 0.07, "learning_rate": 9.889205650043378e-05, "loss": 1.0298, "step": 4570 }, { "epoch": 0.07, "learning_rate": 9.888958571381371e-05, "loss": 1.0464, "step": 4575 }, { "epoch": 0.07, "learning_rate": 9.888711220618829e-05, "loss": 1.0286, "step": 4580 }, { "epoch": 0.07, "learning_rate": 9.888463597769523e-05, "loss": 1.0229, "step": 4585 }, { "epoch": 0.07, "learning_rate": 9.888215702847232e-05, "loss": 1.0404, "step": 4590 }, { "epoch": 0.07, "learning_rate": 9.887967535865756e-05, "loss": 1.0473, "step": 4595 }, { "epoch": 0.07, "learning_rate": 9.887719096838905e-05, "loss": 1.0221, "step": 4600 }, { "epoch": 0.07, "learning_rate": 9.887470385780504e-05, "loss": 1.024, "step": 4605 }, { "epoch": 0.07, "learning_rate": 9.8872214027044e-05, "loss": 0.9968, "step": 4610 }, { "epoch": 0.07, "learning_rate": 9.886972147624447e-05, "loss": 0.9902, "step": 4615 }, { "epoch": 0.07, "learning_rate": 9.886722620554518e-05, "loss": 1.026, "step": 4620 }, { "epoch": 0.07, "learning_rate": 9.886472821508503e-05, "loss": 1.0079, "step": 4625 }, { "epoch": 0.07, "learning_rate": 9.886222750500303e-05, "loss": 1.0058, "step": 4630 }, { "epoch": 0.07, "learning_rate": 9.885972407543834e-05, "loss": 1.0488, "step": 4635 }, { "epoch": 0.07, "learning_rate": 9.885721792653033e-05, "loss": 1.016, "step": 4640 }, { "epoch": 0.07, "learning_rate": 9.885470905841847e-05, "loss": 1.0406, "step": 4645 }, { "epoch": 0.07, "learning_rate": 9.885219747124237e-05, "loss": 0.9955, "step": 4650 }, { "epoch": 0.07, "learning_rate": 9.884968316514185e-05, "loss": 1.0009, "step": 4655 }, { "epoch": 0.07, "learning_rate": 9.884716614025682e-05, "loss": 1.0264, "step": 4660 }, { "epoch": 0.07, "learning_rate": 9.88446463967274e-05, "loss": 0.9916, "step": 4665 }, { "epoch": 0.07, "learning_rate": 9.884212393469378e-05, "loss": 0.9988, "step": 4670 }, { "epoch": 0.07, "learning_rate": 9.883959875429638e-05, "loss": 0.9965, "step": 4675 }, { "epoch": 0.07, "learning_rate": 9.883707085567575e-05, "loss": 1.002, "step": 4680 }, { "epoch": 0.07, "learning_rate": 9.883454023897259e-05, "loss": 1.0051, "step": 4685 }, { "epoch": 0.07, "learning_rate": 9.88320069043277e-05, "loss": 1.0116, "step": 4690 }, { "epoch": 0.07, "learning_rate": 9.882947085188213e-05, "loss": 1.0262, "step": 4695 }, { "epoch": 0.07, "learning_rate": 9.882693208177697e-05, "loss": 1.0089, "step": 4700 }, { "epoch": 0.07, "learning_rate": 9.882439059415357e-05, "loss": 1.0138, "step": 4705 }, { "epoch": 0.07, "learning_rate": 9.882184638915335e-05, "loss": 0.9924, "step": 4710 }, { "epoch": 0.07, "learning_rate": 9.881929946691791e-05, "loss": 1.0158, "step": 4715 }, { "epoch": 0.07, "learning_rate": 9.881674982758903e-05, "loss": 1.0134, "step": 4720 }, { "epoch": 0.07, "learning_rate": 9.881419747130858e-05, "loss": 1.0323, "step": 4725 }, { "epoch": 0.07, "learning_rate": 9.881164239821863e-05, "loss": 1.0354, "step": 4730 }, { "epoch": 0.07, "learning_rate": 9.880908460846137e-05, "loss": 1.0231, "step": 4735 }, { "epoch": 0.07, "learning_rate": 9.880652410217919e-05, "loss": 1.0, "step": 4740 }, { "epoch": 0.07, "learning_rate": 9.880396087951458e-05, "loss": 1.0069, "step": 4745 }, { "epoch": 0.07, "learning_rate": 9.880139494061018e-05, "loss": 1.0172, "step": 4750 }, { "epoch": 0.07, "learning_rate": 9.879882628560881e-05, "loss": 1.0039, "step": 4755 }, { "epoch": 0.07, "learning_rate": 9.879625491465345e-05, "loss": 0.985, "step": 4760 }, { "epoch": 0.07, "learning_rate": 9.879368082788722e-05, "loss": 1.0212, "step": 4765 }, { "epoch": 0.07, "learning_rate": 9.879110402545334e-05, "loss": 1.0206, "step": 4770 }, { "epoch": 0.07, "learning_rate": 9.878852450749525e-05, "loss": 1.0257, "step": 4775 }, { "epoch": 0.07, "learning_rate": 9.878594227415651e-05, "loss": 1.004, "step": 4780 }, { "epoch": 0.07, "learning_rate": 9.878335732558086e-05, "loss": 1.0186, "step": 4785 }, { "epoch": 0.07, "learning_rate": 9.878076966191214e-05, "loss": 1.0292, "step": 4790 }, { "epoch": 0.07, "learning_rate": 9.87781792832944e-05, "loss": 1.0383, "step": 4795 }, { "epoch": 0.07, "learning_rate": 9.877558618987178e-05, "loss": 1.037, "step": 4800 }, { "epoch": 0.07, "learning_rate": 9.87729903817886e-05, "loss": 1.005, "step": 4805 }, { "epoch": 0.07, "learning_rate": 9.877039185918935e-05, "loss": 1.0354, "step": 4810 }, { "epoch": 0.07, "learning_rate": 9.876779062221864e-05, "loss": 1.0385, "step": 4815 }, { "epoch": 0.07, "learning_rate": 9.876518667102127e-05, "loss": 1.0001, "step": 4820 }, { "epoch": 0.07, "learning_rate": 9.876258000574214e-05, "loss": 1.0177, "step": 4825 }, { "epoch": 0.07, "learning_rate": 9.875997062652633e-05, "loss": 1.0113, "step": 4830 }, { "epoch": 0.07, "learning_rate": 9.875735853351908e-05, "loss": 1.0082, "step": 4835 }, { "epoch": 0.07, "learning_rate": 9.875474372686577e-05, "loss": 1.0193, "step": 4840 }, { "epoch": 0.07, "learning_rate": 9.875212620671193e-05, "loss": 1.0307, "step": 4845 }, { "epoch": 0.07, "learning_rate": 9.874950597320321e-05, "loss": 1.0259, "step": 4850 }, { "epoch": 0.07, "learning_rate": 9.874688302648548e-05, "loss": 1.0235, "step": 4855 }, { "epoch": 0.07, "learning_rate": 9.87442573667047e-05, "loss": 1.0084, "step": 4860 }, { "epoch": 0.07, "learning_rate": 9.874162899400703e-05, "loss": 1.027, "step": 4865 }, { "epoch": 0.07, "learning_rate": 9.873899790853872e-05, "loss": 1.005, "step": 4870 }, { "epoch": 0.07, "learning_rate": 9.873636411044623e-05, "loss": 0.9997, "step": 4875 }, { "epoch": 0.07, "learning_rate": 9.873372759987615e-05, "loss": 0.9947, "step": 4880 }, { "epoch": 0.07, "learning_rate": 9.87310883769752e-05, "loss": 1.0224, "step": 4885 }, { "epoch": 0.07, "learning_rate": 9.872844644189027e-05, "loss": 0.9815, "step": 4890 }, { "epoch": 0.07, "learning_rate": 9.872580179476842e-05, "loss": 1.0096, "step": 4895 }, { "epoch": 0.07, "learning_rate": 9.872315443575684e-05, "loss": 0.9872, "step": 4900 }, { "epoch": 0.07, "learning_rate": 9.872050436500284e-05, "loss": 1.0226, "step": 4905 }, { "epoch": 0.07, "learning_rate": 9.871785158265396e-05, "loss": 1.0006, "step": 4910 }, { "epoch": 0.07, "learning_rate": 9.87151960888578e-05, "loss": 0.9939, "step": 4915 }, { "epoch": 0.07, "learning_rate": 9.871253788376216e-05, "loss": 1.0251, "step": 4920 }, { "epoch": 0.07, "learning_rate": 9.870987696751501e-05, "loss": 0.9992, "step": 4925 }, { "epoch": 0.07, "learning_rate": 9.870721334026443e-05, "loss": 1.0266, "step": 4930 }, { "epoch": 0.07, "learning_rate": 9.870454700215868e-05, "loss": 1.0031, "step": 4935 }, { "epoch": 0.07, "learning_rate": 9.870187795334613e-05, "loss": 1.0045, "step": 4940 }, { "epoch": 0.07, "learning_rate": 9.869920619397537e-05, "loss": 1.0095, "step": 4945 }, { "epoch": 0.07, "learning_rate": 9.869653172419506e-05, "loss": 0.997, "step": 4950 }, { "epoch": 0.07, "learning_rate": 9.869385454415408e-05, "loss": 1.0172, "step": 4955 }, { "epoch": 0.07, "learning_rate": 9.86911746540014e-05, "loss": 1.0056, "step": 4960 }, { "epoch": 0.07, "learning_rate": 9.868849205388622e-05, "loss": 1.0278, "step": 4965 }, { "epoch": 0.07, "learning_rate": 9.868580674395779e-05, "loss": 0.9971, "step": 4970 }, { "epoch": 0.07, "learning_rate": 9.86831187243656e-05, "loss": 1.0189, "step": 4975 }, { "epoch": 0.07, "learning_rate": 9.868042799525924e-05, "loss": 1.0128, "step": 4980 }, { "epoch": 0.07, "learning_rate": 9.867773455678847e-05, "loss": 1.0249, "step": 4985 }, { "epoch": 0.07, "learning_rate": 9.86750384091032e-05, "loss": 1.0054, "step": 4990 }, { "epoch": 0.07, "learning_rate": 9.867233955235347e-05, "loss": 0.9872, "step": 4995 }, { "epoch": 0.07, "learning_rate": 9.86696379866895e-05, "loss": 1.0011, "step": 5000 }, { "epoch": 0.08, "learning_rate": 9.866693371226168e-05, "loss": 1.0219, "step": 5005 }, { "epoch": 0.08, "learning_rate": 9.866422672922047e-05, "loss": 1.0429, "step": 5010 }, { "epoch": 0.08, "learning_rate": 9.866151703771654e-05, "loss": 0.9924, "step": 5015 }, { "epoch": 0.08, "learning_rate": 9.865880463790072e-05, "loss": 1.0033, "step": 5020 }, { "epoch": 0.08, "learning_rate": 9.865608952992396e-05, "loss": 0.9834, "step": 5025 }, { "epoch": 0.08, "learning_rate": 9.865337171393739e-05, "loss": 1.0018, "step": 5030 }, { "epoch": 0.08, "learning_rate": 9.865065119009222e-05, "loss": 1.0008, "step": 5035 }, { "epoch": 0.08, "learning_rate": 9.864792795853992e-05, "loss": 1.0144, "step": 5040 }, { "epoch": 0.08, "learning_rate": 9.864520201943204e-05, "loss": 1.0223, "step": 5045 }, { "epoch": 0.08, "learning_rate": 9.86424733729203e-05, "loss": 1.0226, "step": 5050 }, { "epoch": 0.08, "learning_rate": 9.863974201915653e-05, "loss": 1.0013, "step": 5055 }, { "epoch": 0.08, "learning_rate": 9.863700795829281e-05, "loss": 1.0206, "step": 5060 }, { "epoch": 0.08, "learning_rate": 9.863427119048124e-05, "loss": 1.0249, "step": 5065 }, { "epoch": 0.08, "learning_rate": 9.863153171587417e-05, "loss": 1.0075, "step": 5070 }, { "epoch": 0.08, "learning_rate": 9.862878953462407e-05, "loss": 1.0344, "step": 5075 }, { "epoch": 0.08, "learning_rate": 9.862604464688356e-05, "loss": 1.0058, "step": 5080 }, { "epoch": 0.08, "learning_rate": 9.862329705280542e-05, "loss": 1.0189, "step": 5085 }, { "epoch": 0.08, "learning_rate": 9.862054675254255e-05, "loss": 0.9759, "step": 5090 }, { "epoch": 0.08, "learning_rate": 9.8617793746248e-05, "loss": 1.0014, "step": 5095 }, { "epoch": 0.08, "learning_rate": 9.861503803407505e-05, "loss": 1.0294, "step": 5100 }, { "epoch": 0.08, "learning_rate": 9.861227961617702e-05, "loss": 0.9888, "step": 5105 }, { "epoch": 0.08, "learning_rate": 9.860951849270747e-05, "loss": 1.0285, "step": 5110 }, { "epoch": 0.08, "learning_rate": 9.860675466382007e-05, "loss": 1.0127, "step": 5115 }, { "epoch": 0.08, "learning_rate": 9.860398812966863e-05, "loss": 0.9983, "step": 5120 }, { "epoch": 0.08, "learning_rate": 9.86012188904071e-05, "loss": 1.0242, "step": 5125 }, { "epoch": 0.08, "learning_rate": 9.859844694618965e-05, "loss": 1.0007, "step": 5130 }, { "epoch": 0.08, "learning_rate": 9.859567229717053e-05, "loss": 1.0196, "step": 5135 }, { "epoch": 0.08, "learning_rate": 9.859289494350417e-05, "loss": 1.0219, "step": 5140 }, { "epoch": 0.08, "learning_rate": 9.859011488534516e-05, "loss": 0.9962, "step": 5145 }, { "epoch": 0.08, "learning_rate": 9.858733212284821e-05, "loss": 1.0038, "step": 5150 }, { "epoch": 0.08, "learning_rate": 9.858454665616823e-05, "loss": 1.0082, "step": 5155 }, { "epoch": 0.08, "learning_rate": 9.858175848546019e-05, "loss": 1.014, "step": 5160 }, { "epoch": 0.08, "learning_rate": 9.857896761087932e-05, "loss": 1.0161, "step": 5165 }, { "epoch": 0.08, "learning_rate": 9.857617403258092e-05, "loss": 0.9938, "step": 5170 }, { "epoch": 0.08, "learning_rate": 9.85733777507205e-05, "loss": 0.9934, "step": 5175 }, { "epoch": 0.08, "learning_rate": 9.857057876545365e-05, "loss": 0.9957, "step": 5180 }, { "epoch": 0.08, "learning_rate": 9.85677770769362e-05, "loss": 1.0136, "step": 5185 }, { "epoch": 0.08, "learning_rate": 9.856497268532404e-05, "loss": 1.022, "step": 5190 }, { "epoch": 0.08, "learning_rate": 9.856216559077325e-05, "loss": 0.9977, "step": 5195 }, { "epoch": 0.08, "learning_rate": 9.855935579344011e-05, "loss": 1.0228, "step": 5200 }, { "epoch": 0.08, "learning_rate": 9.855654329348095e-05, "loss": 1.0028, "step": 5205 }, { "epoch": 0.08, "learning_rate": 9.855372809105232e-05, "loss": 0.9794, "step": 5210 }, { "epoch": 0.08, "learning_rate": 9.85509101863109e-05, "loss": 1.0215, "step": 5215 }, { "epoch": 0.08, "learning_rate": 9.854808957941355e-05, "loss": 1.0069, "step": 5220 }, { "epoch": 0.08, "learning_rate": 9.854526627051723e-05, "loss": 1.0092, "step": 5225 }, { "epoch": 0.08, "learning_rate": 9.854244025977907e-05, "loss": 0.998, "step": 5230 }, { "epoch": 0.08, "learning_rate": 9.853961154735636e-05, "loss": 1.0069, "step": 5235 }, { "epoch": 0.08, "learning_rate": 9.853678013340654e-05, "loss": 1.0189, "step": 5240 }, { "epoch": 0.08, "learning_rate": 9.85339460180872e-05, "loss": 1.0165, "step": 5245 }, { "epoch": 0.08, "learning_rate": 9.853110920155608e-05, "loss": 1.0024, "step": 5250 }, { "epoch": 0.08, "learning_rate": 9.852826968397104e-05, "loss": 1.0123, "step": 5255 }, { "epoch": 0.08, "learning_rate": 9.852542746549014e-05, "loss": 0.9887, "step": 5260 }, { "epoch": 0.08, "learning_rate": 9.852258254627155e-05, "loss": 1.0182, "step": 5265 }, { "epoch": 0.08, "learning_rate": 9.851973492647361e-05, "loss": 1.0026, "step": 5270 }, { "epoch": 0.08, "learning_rate": 9.851688460625483e-05, "loss": 1.0063, "step": 5275 }, { "epoch": 0.08, "learning_rate": 9.851403158577383e-05, "loss": 1.0055, "step": 5280 }, { "epoch": 0.08, "learning_rate": 9.851117586518941e-05, "loss": 1.0139, "step": 5285 }, { "epoch": 0.08, "learning_rate": 9.850831744466048e-05, "loss": 1.0115, "step": 5290 }, { "epoch": 0.08, "learning_rate": 9.850545632434615e-05, "loss": 0.979, "step": 5295 }, { "epoch": 0.08, "learning_rate": 9.850259250440567e-05, "loss": 0.9961, "step": 5300 }, { "epoch": 0.08, "learning_rate": 9.84997259849984e-05, "loss": 0.9969, "step": 5305 }, { "epoch": 0.08, "learning_rate": 9.849685676628391e-05, "loss": 1.0048, "step": 5310 }, { "epoch": 0.08, "learning_rate": 9.849398484842186e-05, "loss": 1.0158, "step": 5315 }, { "epoch": 0.08, "learning_rate": 9.849111023157211e-05, "loss": 0.9769, "step": 5320 }, { "epoch": 0.08, "learning_rate": 9.848823291589464e-05, "loss": 1.0191, "step": 5325 }, { "epoch": 0.08, "learning_rate": 9.848535290154959e-05, "loss": 1.0244, "step": 5330 }, { "epoch": 0.08, "learning_rate": 9.848247018869726e-05, "loss": 0.9886, "step": 5335 }, { "epoch": 0.08, "learning_rate": 9.847958477749808e-05, "loss": 0.9983, "step": 5340 }, { "epoch": 0.08, "learning_rate": 9.847669666811265e-05, "loss": 0.994, "step": 5345 }, { "epoch": 0.08, "learning_rate": 9.847380586070171e-05, "loss": 1.0164, "step": 5350 }, { "epoch": 0.08, "learning_rate": 9.847091235542616e-05, "loss": 1.0013, "step": 5355 }, { "epoch": 0.08, "learning_rate": 9.846801615244701e-05, "loss": 1.0035, "step": 5360 }, { "epoch": 0.08, "learning_rate": 9.846511725192548e-05, "loss": 1.0235, "step": 5365 }, { "epoch": 0.08, "learning_rate": 9.84622156540229e-05, "loss": 0.9952, "step": 5370 }, { "epoch": 0.08, "learning_rate": 9.845931135890078e-05, "loss": 1.0199, "step": 5375 }, { "epoch": 0.08, "learning_rate": 9.845640436672072e-05, "loss": 1.0265, "step": 5380 }, { "epoch": 0.08, "learning_rate": 9.845349467764455e-05, "loss": 0.9787, "step": 5385 }, { "epoch": 0.08, "learning_rate": 9.84505822918342e-05, "loss": 1.0517, "step": 5390 }, { "epoch": 0.08, "learning_rate": 9.844766720945178e-05, "loss": 1.0284, "step": 5395 }, { "epoch": 0.08, "learning_rate": 9.844474943065949e-05, "loss": 1.0281, "step": 5400 }, { "epoch": 0.08, "learning_rate": 9.844182895561975e-05, "loss": 0.9826, "step": 5405 }, { "epoch": 0.08, "learning_rate": 9.84389057844951e-05, "loss": 0.9898, "step": 5410 }, { "epoch": 0.08, "learning_rate": 9.843597991744824e-05, "loss": 1.0067, "step": 5415 }, { "epoch": 0.08, "learning_rate": 9.8433051354642e-05, "loss": 0.9714, "step": 5420 }, { "epoch": 0.08, "learning_rate": 9.843012009623938e-05, "loss": 1.0234, "step": 5425 }, { "epoch": 0.08, "learning_rate": 9.842718614240352e-05, "loss": 1.0013, "step": 5430 }, { "epoch": 0.08, "learning_rate": 9.84242494932977e-05, "loss": 0.9941, "step": 5435 }, { "epoch": 0.08, "learning_rate": 9.84213101490854e-05, "loss": 0.9922, "step": 5440 }, { "epoch": 0.08, "learning_rate": 9.841836810993014e-05, "loss": 1.0055, "step": 5445 }, { "epoch": 0.08, "learning_rate": 9.841542337599575e-05, "loss": 1.0119, "step": 5450 }, { "epoch": 0.08, "learning_rate": 9.841247594744607e-05, "loss": 1.0063, "step": 5455 }, { "epoch": 0.08, "learning_rate": 9.840952582444516e-05, "loss": 1.0268, "step": 5460 }, { "epoch": 0.08, "learning_rate": 9.84065730071572e-05, "loss": 0.9858, "step": 5465 }, { "epoch": 0.08, "learning_rate": 9.840361749574654e-05, "loss": 1.0093, "step": 5470 }, { "epoch": 0.08, "learning_rate": 9.840065929037767e-05, "loss": 1.0085, "step": 5475 }, { "epoch": 0.08, "learning_rate": 9.839769839121523e-05, "loss": 0.9942, "step": 5480 }, { "epoch": 0.08, "learning_rate": 9.839473479842403e-05, "loss": 1.0054, "step": 5485 }, { "epoch": 0.08, "learning_rate": 9.839176851216898e-05, "loss": 0.9933, "step": 5490 }, { "epoch": 0.08, "learning_rate": 9.83887995326152e-05, "loss": 1.007, "step": 5495 }, { "epoch": 0.08, "learning_rate": 9.838582785992791e-05, "loss": 0.9992, "step": 5500 }, { "epoch": 0.08, "learning_rate": 9.838285349427254e-05, "loss": 1.0131, "step": 5505 }, { "epoch": 0.08, "learning_rate": 9.837987643581459e-05, "loss": 0.9786, "step": 5510 }, { "epoch": 0.08, "learning_rate": 9.837689668471977e-05, "loss": 1.0429, "step": 5515 }, { "epoch": 0.08, "learning_rate": 9.837391424115391e-05, "loss": 0.9994, "step": 5520 }, { "epoch": 0.08, "learning_rate": 9.837092910528302e-05, "loss": 1.0039, "step": 5525 }, { "epoch": 0.08, "learning_rate": 9.836794127727322e-05, "loss": 1.0176, "step": 5530 }, { "epoch": 0.08, "learning_rate": 9.836495075729082e-05, "loss": 1.0025, "step": 5535 }, { "epoch": 0.08, "learning_rate": 9.836195754550224e-05, "loss": 0.989, "step": 5540 }, { "epoch": 0.08, "learning_rate": 9.83589616420741e-05, "loss": 0.9905, "step": 5545 }, { "epoch": 0.08, "learning_rate": 9.835596304717311e-05, "loss": 1.0082, "step": 5550 }, { "epoch": 0.08, "learning_rate": 9.835296176096619e-05, "loss": 1.0127, "step": 5555 }, { "epoch": 0.08, "learning_rate": 9.834995778362035e-05, "loss": 1.0205, "step": 5560 }, { "epoch": 0.08, "learning_rate": 9.83469511153028e-05, "loss": 1.0091, "step": 5565 }, { "epoch": 0.08, "learning_rate": 9.834394175618087e-05, "loss": 1.0116, "step": 5570 }, { "epoch": 0.08, "learning_rate": 9.834092970642206e-05, "loss": 0.9858, "step": 5575 }, { "epoch": 0.08, "learning_rate": 9.8337914966194e-05, "loss": 0.9736, "step": 5580 }, { "epoch": 0.08, "learning_rate": 9.833489753566447e-05, "loss": 0.9988, "step": 5585 }, { "epoch": 0.08, "learning_rate": 9.833187741500143e-05, "loss": 1.0078, "step": 5590 }, { "epoch": 0.08, "learning_rate": 9.832885460437294e-05, "loss": 1.0005, "step": 5595 }, { "epoch": 0.08, "learning_rate": 9.832582910394727e-05, "loss": 0.9749, "step": 5600 }, { "epoch": 0.08, "learning_rate": 9.832280091389278e-05, "loss": 0.9844, "step": 5605 }, { "epoch": 0.08, "learning_rate": 9.831977003437805e-05, "loss": 1.0053, "step": 5610 }, { "epoch": 0.08, "learning_rate": 9.83167364655717e-05, "loss": 1.007, "step": 5615 }, { "epoch": 0.08, "learning_rate": 9.831370020764262e-05, "loss": 1.0207, "step": 5620 }, { "epoch": 0.08, "learning_rate": 9.831066126075978e-05, "loss": 1.02, "step": 5625 }, { "epoch": 0.08, "learning_rate": 9.830761962509232e-05, "loss": 0.9541, "step": 5630 }, { "epoch": 0.08, "learning_rate": 9.830457530080953e-05, "loss": 1.0114, "step": 5635 }, { "epoch": 0.08, "learning_rate": 9.830152828808081e-05, "loss": 1.0036, "step": 5640 }, { "epoch": 0.08, "learning_rate": 9.829847858707579e-05, "loss": 1.0051, "step": 5645 }, { "epoch": 0.08, "learning_rate": 9.829542619796418e-05, "loss": 1.0022, "step": 5650 }, { "epoch": 0.08, "learning_rate": 9.829237112091587e-05, "loss": 1.0073, "step": 5655 }, { "epoch": 0.08, "learning_rate": 9.82893133561009e-05, "loss": 1.0177, "step": 5660 }, { "epoch": 0.08, "learning_rate": 9.828625290368945e-05, "loss": 1.0251, "step": 5665 }, { "epoch": 0.09, "learning_rate": 9.828318976385184e-05, "loss": 0.9796, "step": 5670 }, { "epoch": 0.09, "learning_rate": 9.828012393675858e-05, "loss": 1.0177, "step": 5675 }, { "epoch": 0.09, "learning_rate": 9.827705542258027e-05, "loss": 0.9967, "step": 5680 }, { "epoch": 0.09, "learning_rate": 9.82739842214877e-05, "loss": 1.0075, "step": 5685 }, { "epoch": 0.09, "learning_rate": 9.827091033365184e-05, "loss": 0.9963, "step": 5690 }, { "epoch": 0.09, "learning_rate": 9.82678337592437e-05, "loss": 1.007, "step": 5695 }, { "epoch": 0.09, "learning_rate": 9.826475449843458e-05, "loss": 0.9951, "step": 5700 }, { "epoch": 0.09, "learning_rate": 9.826167255139583e-05, "loss": 1.0151, "step": 5705 }, { "epoch": 0.09, "learning_rate": 9.825858791829894e-05, "loss": 1.0226, "step": 5710 }, { "epoch": 0.09, "learning_rate": 9.825550059931566e-05, "loss": 0.9919, "step": 5715 }, { "epoch": 0.09, "learning_rate": 9.825241059461779e-05, "loss": 1.0015, "step": 5720 }, { "epoch": 0.09, "learning_rate": 9.824931790437728e-05, "loss": 1.0027, "step": 5725 }, { "epoch": 0.09, "learning_rate": 9.82462225287663e-05, "loss": 0.9823, "step": 5730 }, { "epoch": 0.09, "learning_rate": 9.824312446795709e-05, "loss": 1.0101, "step": 5735 }, { "epoch": 0.09, "learning_rate": 9.824002372212211e-05, "loss": 0.9975, "step": 5740 }, { "epoch": 0.09, "learning_rate": 9.823692029143389e-05, "loss": 1.0027, "step": 5745 }, { "epoch": 0.09, "learning_rate": 9.823381417606522e-05, "loss": 1.0028, "step": 5750 }, { "epoch": 0.09, "learning_rate": 9.823070537618892e-05, "loss": 0.98, "step": 5755 }, { "epoch": 0.09, "learning_rate": 9.822759389197803e-05, "loss": 0.9898, "step": 5760 }, { "epoch": 0.09, "learning_rate": 9.822447972360573e-05, "loss": 1.0055, "step": 5765 }, { "epoch": 0.09, "learning_rate": 9.822136287124532e-05, "loss": 0.9914, "step": 5770 }, { "epoch": 0.09, "learning_rate": 9.821824333507032e-05, "loss": 1.0195, "step": 5775 }, { "epoch": 0.09, "learning_rate": 9.82151211152543e-05, "loss": 0.9807, "step": 5780 }, { "epoch": 0.09, "learning_rate": 9.821199621197106e-05, "loss": 0.9945, "step": 5785 }, { "epoch": 0.09, "learning_rate": 9.820886862539449e-05, "loss": 0.9956, "step": 5790 }, { "epoch": 0.09, "learning_rate": 9.820573835569871e-05, "loss": 0.987, "step": 5795 }, { "epoch": 0.09, "learning_rate": 9.820260540305789e-05, "loss": 1.0201, "step": 5800 }, { "epoch": 0.09, "learning_rate": 9.819946976764643e-05, "loss": 1.0241, "step": 5805 }, { "epoch": 0.09, "learning_rate": 9.819633144963882e-05, "loss": 0.9898, "step": 5810 }, { "epoch": 0.09, "learning_rate": 9.819319044920975e-05, "loss": 1.0122, "step": 5815 }, { "epoch": 0.09, "learning_rate": 9.819004676653403e-05, "loss": 1.0039, "step": 5820 }, { "epoch": 0.09, "learning_rate": 9.818690040178662e-05, "loss": 0.993, "step": 5825 }, { "epoch": 0.09, "learning_rate": 9.818375135514263e-05, "loss": 0.9893, "step": 5830 }, { "epoch": 0.09, "learning_rate": 9.818059962677733e-05, "loss": 1.0264, "step": 5835 }, { "epoch": 0.09, "learning_rate": 9.817744521686615e-05, "loss": 1.0002, "step": 5840 }, { "epoch": 0.09, "learning_rate": 9.817428812558463e-05, "loss": 1.001, "step": 5845 }, { "epoch": 0.09, "learning_rate": 9.817112835310848e-05, "loss": 1.0225, "step": 5850 }, { "epoch": 0.09, "learning_rate": 9.816796589961357e-05, "loss": 0.9798, "step": 5855 }, { "epoch": 0.09, "learning_rate": 9.816480076527591e-05, "loss": 1.0157, "step": 5860 }, { "epoch": 0.09, "learning_rate": 9.816163295027166e-05, "loss": 0.9794, "step": 5865 }, { "epoch": 0.09, "learning_rate": 9.815846245477713e-05, "loss": 0.9827, "step": 5870 }, { "epoch": 0.09, "learning_rate": 9.815528927896875e-05, "loss": 0.9822, "step": 5875 }, { "epoch": 0.09, "learning_rate": 9.815211342302318e-05, "loss": 0.9808, "step": 5880 }, { "epoch": 0.09, "learning_rate": 9.814893488711711e-05, "loss": 1.0056, "step": 5885 }, { "epoch": 0.09, "learning_rate": 9.814575367142752e-05, "loss": 0.9871, "step": 5890 }, { "epoch": 0.09, "learning_rate": 9.814256977613141e-05, "loss": 0.9832, "step": 5895 }, { "epoch": 0.09, "learning_rate": 9.8139383201406e-05, "loss": 0.9785, "step": 5900 }, { "epoch": 0.09, "learning_rate": 9.813619394742863e-05, "loss": 0.9894, "step": 5905 }, { "epoch": 0.09, "learning_rate": 9.813300201437681e-05, "loss": 0.9834, "step": 5910 }, { "epoch": 0.09, "learning_rate": 9.812980740242821e-05, "loss": 1.0137, "step": 5915 }, { "epoch": 0.09, "learning_rate": 9.812661011176061e-05, "loss": 0.9897, "step": 5920 }, { "epoch": 0.09, "learning_rate": 9.812341014255196e-05, "loss": 0.9831, "step": 5925 }, { "epoch": 0.09, "learning_rate": 9.812020749498035e-05, "loss": 0.986, "step": 5930 }, { "epoch": 0.09, "learning_rate": 9.811700216922404e-05, "loss": 1.0034, "step": 5935 }, { "epoch": 0.09, "learning_rate": 9.811379416546142e-05, "loss": 0.9941, "step": 5940 }, { "epoch": 0.09, "learning_rate": 9.811058348387104e-05, "loss": 0.9948, "step": 5945 }, { "epoch": 0.09, "learning_rate": 9.810737012463159e-05, "loss": 0.9763, "step": 5950 }, { "epoch": 0.09, "learning_rate": 9.810415408792191e-05, "loss": 0.9992, "step": 5955 }, { "epoch": 0.09, "learning_rate": 9.810093537392102e-05, "loss": 0.9902, "step": 5960 }, { "epoch": 0.09, "learning_rate": 9.809771398280801e-05, "loss": 1.0157, "step": 5965 }, { "epoch": 0.09, "learning_rate": 9.809448991476223e-05, "loss": 0.9983, "step": 5970 }, { "epoch": 0.09, "learning_rate": 9.809126316996306e-05, "loss": 0.9886, "step": 5975 }, { "epoch": 0.09, "learning_rate": 9.808803374859013e-05, "loss": 1.0036, "step": 5980 }, { "epoch": 0.09, "learning_rate": 9.808480165082315e-05, "loss": 0.9929, "step": 5985 }, { "epoch": 0.09, "learning_rate": 9.808156687684203e-05, "loss": 0.995, "step": 5990 }, { "epoch": 0.09, "learning_rate": 9.807832942682678e-05, "loss": 0.9984, "step": 5995 }, { "epoch": 0.09, "learning_rate": 9.807508930095762e-05, "loss": 0.9916, "step": 6000 }, { "epoch": 0.09, "learning_rate": 9.807184649941484e-05, "loss": 0.9806, "step": 6005 }, { "epoch": 0.09, "learning_rate": 9.806860102237893e-05, "loss": 1.0016, "step": 6010 }, { "epoch": 0.09, "learning_rate": 9.806535287003056e-05, "loss": 0.9928, "step": 6015 }, { "epoch": 0.09, "learning_rate": 9.806210204255049e-05, "loss": 0.9908, "step": 6020 }, { "epoch": 0.09, "learning_rate": 9.805884854011961e-05, "loss": 1.0187, "step": 6025 }, { "epoch": 0.09, "learning_rate": 9.805559236291905e-05, "loss": 1.0207, "step": 6030 }, { "epoch": 0.09, "learning_rate": 9.805233351113001e-05, "loss": 1.018, "step": 6035 }, { "epoch": 0.09, "learning_rate": 9.804907198493387e-05, "loss": 1.0018, "step": 6040 }, { "epoch": 0.09, "learning_rate": 9.804580778451216e-05, "loss": 1.0002, "step": 6045 }, { "epoch": 0.09, "learning_rate": 9.804254091004654e-05, "loss": 0.9915, "step": 6050 }, { "epoch": 0.09, "learning_rate": 9.803927136171882e-05, "loss": 0.9864, "step": 6055 }, { "epoch": 0.09, "learning_rate": 9.803599913971102e-05, "loss": 0.9868, "step": 6060 }, { "epoch": 0.09, "learning_rate": 9.803272424420522e-05, "loss": 0.9765, "step": 6065 }, { "epoch": 0.09, "learning_rate": 9.802944667538367e-05, "loss": 0.9849, "step": 6070 }, { "epoch": 0.09, "learning_rate": 9.802616643342883e-05, "loss": 0.9995, "step": 6075 }, { "epoch": 0.09, "learning_rate": 9.802288351852327e-05, "loss": 1.0014, "step": 6080 }, { "epoch": 0.09, "learning_rate": 9.801959793084965e-05, "loss": 0.9843, "step": 6085 }, { "epoch": 0.09, "learning_rate": 9.801630967059088e-05, "loss": 0.9966, "step": 6090 }, { "epoch": 0.09, "learning_rate": 9.801301873792996e-05, "loss": 1.0073, "step": 6095 }, { "epoch": 0.09, "learning_rate": 9.800972513305004e-05, "loss": 0.9898, "step": 6100 }, { "epoch": 0.09, "learning_rate": 9.800642885613445e-05, "loss": 0.9612, "step": 6105 }, { "epoch": 0.09, "learning_rate": 9.800312990736662e-05, "loss": 0.9875, "step": 6110 }, { "epoch": 0.09, "learning_rate": 9.799982828693018e-05, "loss": 0.9866, "step": 6115 }, { "epoch": 0.09, "learning_rate": 9.799652399500888e-05, "loss": 0.997, "step": 6120 }, { "epoch": 0.09, "learning_rate": 9.799321703178662e-05, "loss": 0.9899, "step": 6125 }, { "epoch": 0.09, "learning_rate": 9.798990739744745e-05, "loss": 0.9936, "step": 6130 }, { "epoch": 0.09, "learning_rate": 9.798659509217558e-05, "loss": 0.9946, "step": 6135 }, { "epoch": 0.09, "learning_rate": 9.798328011615537e-05, "loss": 1.0053, "step": 6140 }, { "epoch": 0.09, "learning_rate": 9.797996246957129e-05, "loss": 1.003, "step": 6145 }, { "epoch": 0.09, "learning_rate": 9.7976642152608e-05, "loss": 0.9941, "step": 6150 }, { "epoch": 0.09, "learning_rate": 9.79733191654503e-05, "loss": 1.0025, "step": 6155 }, { "epoch": 0.09, "learning_rate": 9.796999350828313e-05, "loss": 1.0076, "step": 6160 }, { "epoch": 0.09, "learning_rate": 9.796666518129159e-05, "loss": 0.9901, "step": 6165 }, { "epoch": 0.09, "learning_rate": 9.796333418466093e-05, "loss": 0.9935, "step": 6170 }, { "epoch": 0.09, "learning_rate": 9.79600005185765e-05, "loss": 0.9912, "step": 6175 }, { "epoch": 0.09, "learning_rate": 9.795666418322389e-05, "loss": 0.9885, "step": 6180 }, { "epoch": 0.09, "learning_rate": 9.795332517878875e-05, "loss": 1.0082, "step": 6185 }, { "epoch": 0.09, "learning_rate": 9.794998350545695e-05, "loss": 1.0098, "step": 6190 }, { "epoch": 0.09, "learning_rate": 9.794663916341444e-05, "loss": 0.9909, "step": 6195 }, { "epoch": 0.09, "learning_rate": 9.794329215284737e-05, "loss": 1.0052, "step": 6200 }, { "epoch": 0.09, "learning_rate": 9.793994247394203e-05, "loss": 0.9842, "step": 6205 }, { "epoch": 0.09, "learning_rate": 9.793659012688484e-05, "loss": 1.0, "step": 6210 }, { "epoch": 0.09, "learning_rate": 9.793323511186236e-05, "loss": 0.9759, "step": 6215 }, { "epoch": 0.09, "learning_rate": 9.792987742906135e-05, "loss": 0.998, "step": 6220 }, { "epoch": 0.09, "learning_rate": 9.792651707866868e-05, "loss": 0.9797, "step": 6225 }, { "epoch": 0.09, "learning_rate": 9.792315406087136e-05, "loss": 0.9986, "step": 6230 }, { "epoch": 0.09, "learning_rate": 9.791978837585656e-05, "loss": 0.9933, "step": 6235 }, { "epoch": 0.09, "learning_rate": 9.791642002381162e-05, "loss": 1.0121, "step": 6240 }, { "epoch": 0.09, "learning_rate": 9.7913049004924e-05, "loss": 0.9898, "step": 6245 }, { "epoch": 0.09, "learning_rate": 9.79096753193813e-05, "loss": 0.9809, "step": 6250 }, { "epoch": 0.09, "learning_rate": 9.790629896737131e-05, "loss": 0.973, "step": 6255 }, { "epoch": 0.09, "learning_rate": 9.790291994908194e-05, "loss": 0.9995, "step": 6260 }, { "epoch": 0.09, "learning_rate": 9.789953826470126e-05, "loss": 0.9859, "step": 6265 }, { "epoch": 0.09, "learning_rate": 9.789615391441747e-05, "loss": 0.9951, "step": 6270 }, { "epoch": 0.09, "learning_rate": 9.789276689841893e-05, "loss": 0.9928, "step": 6275 }, { "epoch": 0.09, "learning_rate": 9.788937721689415e-05, "loss": 1.0111, "step": 6280 }, { "epoch": 0.09, "learning_rate": 9.788598487003178e-05, "loss": 1.0081, "step": 6285 }, { "epoch": 0.09, "learning_rate": 9.788258985802063e-05, "loss": 1.0066, "step": 6290 }, { "epoch": 0.09, "learning_rate": 9.787919218104967e-05, "loss": 1.0142, "step": 6295 }, { "epoch": 0.09, "learning_rate": 9.787579183930798e-05, "loss": 1.0087, "step": 6300 }, { "epoch": 0.09, "learning_rate": 9.787238883298481e-05, "loss": 1.0296, "step": 6305 }, { "epoch": 0.09, "learning_rate": 9.786898316226958e-05, "loss": 0.9934, "step": 6310 }, { "epoch": 0.09, "learning_rate": 9.786557482735181e-05, "loss": 0.9947, "step": 6315 }, { "epoch": 0.09, "learning_rate": 9.786216382842121e-05, "loss": 0.9978, "step": 6320 }, { "epoch": 0.09, "learning_rate": 9.785875016566763e-05, "loss": 0.9819, "step": 6325 }, { "epoch": 0.09, "learning_rate": 9.785533383928104e-05, "loss": 0.9495, "step": 6330 }, { "epoch": 0.1, "learning_rate": 9.785191484945159e-05, "loss": 0.9861, "step": 6335 }, { "epoch": 0.1, "learning_rate": 9.784849319636956e-05, "loss": 1.002, "step": 6340 }, { "epoch": 0.1, "learning_rate": 9.78450688802254e-05, "loss": 0.9988, "step": 6345 }, { "epoch": 0.1, "learning_rate": 9.784164190120969e-05, "loss": 0.9798, "step": 6350 }, { "epoch": 0.1, "learning_rate": 9.783821225951317e-05, "loss": 0.9867, "step": 6355 }, { "epoch": 0.1, "learning_rate": 9.783477995532668e-05, "loss": 1.0098, "step": 6360 }, { "epoch": 0.1, "learning_rate": 9.78313449888413e-05, "loss": 0.9737, "step": 6365 }, { "epoch": 0.1, "learning_rate": 9.78279073602482e-05, "loss": 0.9876, "step": 6370 }, { "epoch": 0.1, "learning_rate": 9.782446706973867e-05, "loss": 0.9913, "step": 6375 }, { "epoch": 0.1, "learning_rate": 9.782102411750422e-05, "loss": 0.9967, "step": 6380 }, { "epoch": 0.1, "learning_rate": 9.781757850373645e-05, "loss": 0.9733, "step": 6385 }, { "epoch": 0.1, "learning_rate": 9.781413022862713e-05, "loss": 0.9987, "step": 6390 }, { "epoch": 0.1, "learning_rate": 9.781067929236818e-05, "loss": 1.0015, "step": 6395 }, { "epoch": 0.1, "learning_rate": 9.780722569515169e-05, "loss": 0.9803, "step": 6400 }, { "epoch": 0.1, "learning_rate": 9.780376943716983e-05, "loss": 1.0017, "step": 6405 }, { "epoch": 0.1, "learning_rate": 9.7800310518615e-05, "loss": 0.9818, "step": 6410 }, { "epoch": 0.1, "learning_rate": 9.779684893967969e-05, "loss": 1.0209, "step": 6415 }, { "epoch": 0.1, "learning_rate": 9.779338470055657e-05, "loss": 0.9855, "step": 6420 }, { "epoch": 0.1, "learning_rate": 9.778991780143843e-05, "loss": 0.9911, "step": 6425 }, { "epoch": 0.1, "learning_rate": 9.778644824251823e-05, "loss": 0.9717, "step": 6430 }, { "epoch": 0.1, "learning_rate": 9.77829760239891e-05, "loss": 0.9782, "step": 6435 }, { "epoch": 0.1, "learning_rate": 9.777950114604425e-05, "loss": 0.9935, "step": 6440 }, { "epoch": 0.1, "learning_rate": 9.777602360887708e-05, "loss": 0.975, "step": 6445 }, { "epoch": 0.1, "learning_rate": 9.777254341268117e-05, "loss": 1.0148, "step": 6450 }, { "epoch": 0.1, "learning_rate": 9.77690605576502e-05, "loss": 0.9783, "step": 6455 }, { "epoch": 0.1, "learning_rate": 9.776557504397798e-05, "loss": 0.9843, "step": 6460 }, { "epoch": 0.1, "learning_rate": 9.776208687185855e-05, "loss": 1.0154, "step": 6465 }, { "epoch": 0.1, "learning_rate": 9.775859604148603e-05, "loss": 0.975, "step": 6470 }, { "epoch": 0.1, "learning_rate": 9.775510255305469e-05, "loss": 0.9713, "step": 6475 }, { "epoch": 0.1, "learning_rate": 9.775160640675897e-05, "loss": 0.9988, "step": 6480 }, { "epoch": 0.1, "learning_rate": 9.774810760279347e-05, "loss": 0.9714, "step": 6485 }, { "epoch": 0.1, "learning_rate": 9.774460614135291e-05, "loss": 0.9786, "step": 6490 }, { "epoch": 0.1, "learning_rate": 9.774110202263216e-05, "loss": 0.958, "step": 6495 }, { "epoch": 0.1, "learning_rate": 9.773759524682624e-05, "loss": 0.9818, "step": 6500 }, { "epoch": 0.1, "learning_rate": 9.773408581413035e-05, "loss": 0.9946, "step": 6505 }, { "epoch": 0.1, "learning_rate": 9.77305737247398e-05, "loss": 0.9607, "step": 6510 }, { "epoch": 0.1, "learning_rate": 9.772705897885004e-05, "loss": 0.9825, "step": 6515 }, { "epoch": 0.1, "learning_rate": 9.77235415766567e-05, "loss": 0.992, "step": 6520 }, { "epoch": 0.1, "learning_rate": 9.772002151835556e-05, "loss": 0.971, "step": 6525 }, { "epoch": 0.1, "learning_rate": 9.771649880414253e-05, "loss": 0.9895, "step": 6530 }, { "epoch": 0.1, "learning_rate": 9.771297343421364e-05, "loss": 1.0047, "step": 6535 }, { "epoch": 0.1, "learning_rate": 9.770944540876514e-05, "loss": 1.0101, "step": 6540 }, { "epoch": 0.1, "learning_rate": 9.770591472799336e-05, "loss": 1.0015, "step": 6545 }, { "epoch": 0.1, "learning_rate": 9.77023813920948e-05, "loss": 0.9921, "step": 6550 }, { "epoch": 0.1, "learning_rate": 9.769884540126612e-05, "loss": 1.0014, "step": 6555 }, { "epoch": 0.1, "learning_rate": 9.769530675570414e-05, "loss": 0.9793, "step": 6560 }, { "epoch": 0.1, "learning_rate": 9.769176545560577e-05, "loss": 0.977, "step": 6565 }, { "epoch": 0.1, "learning_rate": 9.768822150116812e-05, "loss": 0.9944, "step": 6570 }, { "epoch": 0.1, "learning_rate": 9.768467489258845e-05, "loss": 0.9616, "step": 6575 }, { "epoch": 0.1, "learning_rate": 9.768112563006413e-05, "loss": 1.0, "step": 6580 }, { "epoch": 0.1, "learning_rate": 9.767757371379271e-05, "loss": 0.9754, "step": 6585 }, { "epoch": 0.1, "learning_rate": 9.767401914397188e-05, "loss": 0.9987, "step": 6590 }, { "epoch": 0.1, "learning_rate": 9.767046192079945e-05, "loss": 0.9941, "step": 6595 }, { "epoch": 0.1, "learning_rate": 9.766690204447341e-05, "loss": 0.9792, "step": 6600 }, { "epoch": 0.1, "learning_rate": 9.766333951519191e-05, "loss": 0.9683, "step": 6605 }, { "epoch": 0.1, "learning_rate": 9.765977433315322e-05, "loss": 0.9728, "step": 6610 }, { "epoch": 0.1, "learning_rate": 9.765620649855574e-05, "loss": 0.9733, "step": 6615 }, { "epoch": 0.1, "learning_rate": 9.765263601159806e-05, "loss": 0.9817, "step": 6620 }, { "epoch": 0.1, "learning_rate": 9.76490628724789e-05, "loss": 1.0034, "step": 6625 }, { "epoch": 0.1, "learning_rate": 9.764548708139713e-05, "loss": 0.9813, "step": 6630 }, { "epoch": 0.1, "learning_rate": 9.764190863855174e-05, "loss": 1.0126, "step": 6635 }, { "epoch": 0.1, "learning_rate": 9.763832754414193e-05, "loss": 0.9735, "step": 6640 }, { "epoch": 0.1, "learning_rate": 9.7634743798367e-05, "loss": 0.9732, "step": 6645 }, { "epoch": 0.1, "learning_rate": 9.763115740142637e-05, "loss": 1.0076, "step": 6650 }, { "epoch": 0.1, "learning_rate": 9.762756835351969e-05, "loss": 0.9997, "step": 6655 }, { "epoch": 0.1, "learning_rate": 9.762397665484669e-05, "loss": 0.9721, "step": 6660 }, { "epoch": 0.1, "learning_rate": 9.762038230560729e-05, "loss": 0.9893, "step": 6665 }, { "epoch": 0.1, "learning_rate": 9.761678530600151e-05, "loss": 1.0168, "step": 6670 }, { "epoch": 0.1, "learning_rate": 9.761318565622957e-05, "loss": 1.0101, "step": 6675 }, { "epoch": 0.1, "learning_rate": 9.76095833564918e-05, "loss": 0.9739, "step": 6680 }, { "epoch": 0.1, "learning_rate": 9.760597840698867e-05, "loss": 1.0068, "step": 6685 }, { "epoch": 0.1, "learning_rate": 9.760237080792086e-05, "loss": 0.9614, "step": 6690 }, { "epoch": 0.1, "learning_rate": 9.759876055948914e-05, "loss": 0.9766, "step": 6695 }, { "epoch": 0.1, "learning_rate": 9.759514766189442e-05, "loss": 0.9994, "step": 6700 }, { "epoch": 0.1, "learning_rate": 9.75915321153378e-05, "loss": 0.9983, "step": 6705 }, { "epoch": 0.1, "learning_rate": 9.758791392002051e-05, "loss": 1.0202, "step": 6710 }, { "epoch": 0.1, "learning_rate": 9.758429307614391e-05, "loss": 0.9702, "step": 6715 }, { "epoch": 0.1, "learning_rate": 9.758066958390954e-05, "loss": 0.958, "step": 6720 }, { "epoch": 0.1, "learning_rate": 9.757704344351905e-05, "loss": 0.9803, "step": 6725 }, { "epoch": 0.1, "learning_rate": 9.757341465517429e-05, "loss": 1.0088, "step": 6730 }, { "epoch": 0.1, "learning_rate": 9.756978321907719e-05, "loss": 0.9869, "step": 6735 }, { "epoch": 0.1, "learning_rate": 9.756614913542986e-05, "loss": 0.9948, "step": 6740 }, { "epoch": 0.1, "learning_rate": 9.75625124044346e-05, "loss": 0.9939, "step": 6745 }, { "epoch": 0.1, "learning_rate": 9.755887302629376e-05, "loss": 0.984, "step": 6750 }, { "epoch": 0.1, "learning_rate": 9.755523100120994e-05, "loss": 0.9798, "step": 6755 }, { "epoch": 0.1, "learning_rate": 9.755158632938582e-05, "loss": 0.9922, "step": 6760 }, { "epoch": 0.1, "learning_rate": 9.754793901102423e-05, "loss": 1.0063, "step": 6765 }, { "epoch": 0.1, "learning_rate": 9.754428904632821e-05, "loss": 1.0019, "step": 6770 }, { "epoch": 0.1, "learning_rate": 9.754063643550089e-05, "loss": 0.9868, "step": 6775 }, { "epoch": 0.1, "learning_rate": 9.753698117874553e-05, "loss": 0.9846, "step": 6780 }, { "epoch": 0.1, "learning_rate": 9.753332327626561e-05, "loss": 0.9733, "step": 6785 }, { "epoch": 0.1, "learning_rate": 9.752966272826468e-05, "loss": 0.9766, "step": 6790 }, { "epoch": 0.1, "learning_rate": 9.752599953494648e-05, "loss": 1.0055, "step": 6795 }, { "epoch": 0.1, "learning_rate": 9.752233369651488e-05, "loss": 0.984, "step": 6800 }, { "epoch": 0.1, "learning_rate": 9.751866521317396e-05, "loss": 0.9934, "step": 6805 }, { "epoch": 0.1, "learning_rate": 9.751499408512783e-05, "loss": 1.0231, "step": 6810 }, { "epoch": 0.1, "learning_rate": 9.751132031258083e-05, "loss": 1.0027, "step": 6815 }, { "epoch": 0.1, "learning_rate": 9.750764389573744e-05, "loss": 0.9578, "step": 6820 }, { "epoch": 0.1, "learning_rate": 9.750396483480228e-05, "loss": 0.9834, "step": 6825 }, { "epoch": 0.1, "learning_rate": 9.750028312998008e-05, "loss": 0.9806, "step": 6830 }, { "epoch": 0.1, "learning_rate": 9.74965987814758e-05, "loss": 0.9781, "step": 6835 }, { "epoch": 0.1, "learning_rate": 9.749291178949442e-05, "loss": 0.9829, "step": 6840 }, { "epoch": 0.1, "learning_rate": 9.748922215424123e-05, "loss": 0.9897, "step": 6845 }, { "epoch": 0.1, "learning_rate": 9.748552987592152e-05, "loss": 0.9756, "step": 6850 }, { "epoch": 0.1, "learning_rate": 9.748183495474082e-05, "loss": 0.9925, "step": 6855 }, { "epoch": 0.1, "learning_rate": 9.747813739090474e-05, "loss": 1.0026, "step": 6860 }, { "epoch": 0.1, "learning_rate": 9.747443718461912e-05, "loss": 1.0023, "step": 6865 }, { "epoch": 0.1, "learning_rate": 9.747073433608987e-05, "loss": 0.9768, "step": 6870 }, { "epoch": 0.1, "learning_rate": 9.746702884552308e-05, "loss": 1.0093, "step": 6875 }, { "epoch": 0.1, "learning_rate": 9.746332071312499e-05, "loss": 0.983, "step": 6880 }, { "epoch": 0.1, "learning_rate": 9.745960993910197e-05, "loss": 0.9744, "step": 6885 }, { "epoch": 0.1, "learning_rate": 9.745589652366055e-05, "loss": 0.9726, "step": 6890 }, { "epoch": 0.1, "learning_rate": 9.745218046700741e-05, "loss": 1.0026, "step": 6895 }, { "epoch": 0.1, "learning_rate": 9.744846176934937e-05, "loss": 0.9892, "step": 6900 }, { "epoch": 0.1, "learning_rate": 9.74447404308934e-05, "loss": 1.0007, "step": 6905 }, { "epoch": 0.1, "learning_rate": 9.74410164518466e-05, "loss": 0.9891, "step": 6910 }, { "epoch": 0.1, "learning_rate": 9.743728983241624e-05, "loss": 0.9746, "step": 6915 }, { "epoch": 0.1, "learning_rate": 9.743356057280974e-05, "loss": 0.9981, "step": 6920 }, { "epoch": 0.1, "learning_rate": 9.742982867323465e-05, "loss": 0.9834, "step": 6925 }, { "epoch": 0.1, "learning_rate": 9.742609413389869e-05, "loss": 0.9782, "step": 6930 }, { "epoch": 0.1, "learning_rate": 9.742235695500966e-05, "loss": 0.9815, "step": 6935 }, { "epoch": 0.1, "learning_rate": 9.74186171367756e-05, "loss": 0.9764, "step": 6940 }, { "epoch": 0.1, "learning_rate": 9.741487467940467e-05, "loss": 0.9917, "step": 6945 }, { "epoch": 0.1, "learning_rate": 9.74111295831051e-05, "loss": 0.9984, "step": 6950 }, { "epoch": 0.1, "learning_rate": 9.740738184808538e-05, "loss": 1.0031, "step": 6955 }, { "epoch": 0.1, "learning_rate": 9.740363147455407e-05, "loss": 0.9895, "step": 6960 }, { "epoch": 0.1, "learning_rate": 9.739987846271989e-05, "loss": 1.0132, "step": 6965 }, { "epoch": 0.1, "learning_rate": 9.739612281279176e-05, "loss": 0.969, "step": 6970 }, { "epoch": 0.1, "learning_rate": 9.739236452497868e-05, "loss": 0.9642, "step": 6975 }, { "epoch": 0.1, "learning_rate": 9.73886035994898e-05, "loss": 0.9881, "step": 6980 }, { "epoch": 0.1, "learning_rate": 9.738484003653447e-05, "loss": 0.9949, "step": 6985 }, { "epoch": 0.1, "learning_rate": 9.738107383632216e-05, "loss": 1.0018, "step": 6990 }, { "epoch": 0.1, "learning_rate": 9.737730499906246e-05, "loss": 1.0003, "step": 6995 }, { "epoch": 0.1, "learning_rate": 9.737353352496514e-05, "loss": 1.0053, "step": 7000 }, { "epoch": 0.11, "learning_rate": 9.736975941424009e-05, "loss": 0.9527, "step": 7005 }, { "epoch": 0.11, "learning_rate": 9.736598266709738e-05, "loss": 1.0055, "step": 7010 }, { "epoch": 0.11, "learning_rate": 9.736220328374721e-05, "loss": 0.9555, "step": 7015 }, { "epoch": 0.11, "learning_rate": 9.735842126439991e-05, "loss": 0.9639, "step": 7020 }, { "epoch": 0.11, "learning_rate": 9.7354636609266e-05, "loss": 0.9723, "step": 7025 }, { "epoch": 0.11, "learning_rate": 9.73508493185561e-05, "loss": 0.9897, "step": 7030 }, { "epoch": 0.11, "learning_rate": 9.734705939248099e-05, "loss": 1.0018, "step": 7035 }, { "epoch": 0.11, "learning_rate": 9.73432668312516e-05, "loss": 1.0115, "step": 7040 }, { "epoch": 0.11, "learning_rate": 9.733947163507904e-05, "loss": 0.9955, "step": 7045 }, { "epoch": 0.11, "learning_rate": 9.73356738041745e-05, "loss": 1.0043, "step": 7050 }, { "epoch": 0.11, "learning_rate": 9.733187333874938e-05, "loss": 0.9778, "step": 7055 }, { "epoch": 0.11, "learning_rate": 9.732807023901518e-05, "loss": 0.9893, "step": 7060 }, { "epoch": 0.11, "learning_rate": 9.732426450518358e-05, "loss": 0.9748, "step": 7065 }, { "epoch": 0.11, "learning_rate": 9.732045613746636e-05, "loss": 0.9987, "step": 7070 }, { "epoch": 0.11, "learning_rate": 9.731664513607553e-05, "loss": 0.9709, "step": 7075 }, { "epoch": 0.11, "learning_rate": 9.731283150122317e-05, "loss": 1.0094, "step": 7080 }, { "epoch": 0.11, "learning_rate": 9.730901523312153e-05, "loss": 0.9807, "step": 7085 }, { "epoch": 0.11, "learning_rate": 9.730519633198301e-05, "loss": 0.9811, "step": 7090 }, { "epoch": 0.11, "learning_rate": 9.730137479802016e-05, "loss": 0.9665, "step": 7095 }, { "epoch": 0.11, "learning_rate": 9.729755063144567e-05, "loss": 0.9783, "step": 7100 }, { "epoch": 0.11, "learning_rate": 9.729372383247237e-05, "loss": 0.9925, "step": 7105 }, { "epoch": 0.11, "learning_rate": 9.728989440131325e-05, "loss": 0.9896, "step": 7110 }, { "epoch": 0.11, "learning_rate": 9.728606233818145e-05, "loss": 0.9805, "step": 7115 }, { "epoch": 0.11, "learning_rate": 9.728222764329024e-05, "loss": 1.0015, "step": 7120 }, { "epoch": 0.11, "learning_rate": 9.727839031685304e-05, "loss": 0.9984, "step": 7125 }, { "epoch": 0.11, "learning_rate": 9.727455035908341e-05, "loss": 0.9872, "step": 7130 }, { "epoch": 0.11, "learning_rate": 9.727070777019511e-05, "loss": 0.9826, "step": 7135 }, { "epoch": 0.11, "learning_rate": 9.726686255040197e-05, "loss": 0.97, "step": 7140 }, { "epoch": 0.11, "learning_rate": 9.726301469991801e-05, "loss": 0.9797, "step": 7145 }, { "epoch": 0.11, "learning_rate": 9.725916421895737e-05, "loss": 0.9842, "step": 7150 }, { "epoch": 0.11, "learning_rate": 9.725531110773439e-05, "loss": 0.9916, "step": 7155 }, { "epoch": 0.11, "learning_rate": 9.725145536646347e-05, "loss": 0.9879, "step": 7160 }, { "epoch": 0.11, "learning_rate": 9.724759699535923e-05, "loss": 0.9864, "step": 7165 }, { "epoch": 0.11, "learning_rate": 9.724373599463643e-05, "loss": 0.9838, "step": 7170 }, { "epoch": 0.11, "learning_rate": 9.723987236450993e-05, "loss": 0.9949, "step": 7175 }, { "epoch": 0.11, "learning_rate": 9.723600610519479e-05, "loss": 0.9803, "step": 7180 }, { "epoch": 0.11, "learning_rate": 9.723213721690617e-05, "loss": 0.9708, "step": 7185 }, { "epoch": 0.11, "learning_rate": 9.722826569985938e-05, "loss": 0.9805, "step": 7190 }, { "epoch": 0.11, "learning_rate": 9.722439155426995e-05, "loss": 0.9894, "step": 7195 }, { "epoch": 0.11, "learning_rate": 9.722051478035346e-05, "loss": 0.992, "step": 7200 }, { "epoch": 0.11, "learning_rate": 9.721663537832568e-05, "loss": 0.9799, "step": 7205 }, { "epoch": 0.11, "learning_rate": 9.721275334840254e-05, "loss": 0.9767, "step": 7210 }, { "epoch": 0.11, "learning_rate": 9.720886869080007e-05, "loss": 0.9634, "step": 7215 }, { "epoch": 0.11, "learning_rate": 9.72049814057345e-05, "loss": 0.9789, "step": 7220 }, { "epoch": 0.11, "learning_rate": 9.720109149342215e-05, "loss": 0.9879, "step": 7225 }, { "epoch": 0.11, "learning_rate": 9.719719895407957e-05, "loss": 0.981, "step": 7230 }, { "epoch": 0.11, "learning_rate": 9.719330378792337e-05, "loss": 0.9957, "step": 7235 }, { "epoch": 0.11, "learning_rate": 9.718940599517032e-05, "loss": 0.992, "step": 7240 }, { "epoch": 0.11, "learning_rate": 9.71855055760374e-05, "loss": 0.9884, "step": 7245 }, { "epoch": 0.11, "learning_rate": 9.718160253074167e-05, "loss": 0.9833, "step": 7250 }, { "epoch": 0.11, "learning_rate": 9.717769685950034e-05, "loss": 0.9955, "step": 7255 }, { "epoch": 0.11, "learning_rate": 9.717378856253082e-05, "loss": 1.0198, "step": 7260 }, { "epoch": 0.11, "learning_rate": 9.71698776400506e-05, "loss": 0.9938, "step": 7265 }, { "epoch": 0.11, "learning_rate": 9.716596409227739e-05, "loss": 0.9907, "step": 7270 }, { "epoch": 0.11, "learning_rate": 9.716204791942895e-05, "loss": 0.9931, "step": 7275 }, { "epoch": 0.11, "learning_rate": 9.715812912172329e-05, "loss": 0.9966, "step": 7280 }, { "epoch": 0.11, "learning_rate": 9.715420769937846e-05, "loss": 1.0072, "step": 7285 }, { "epoch": 0.11, "learning_rate": 9.715028365261277e-05, "loss": 0.9757, "step": 7290 }, { "epoch": 0.11, "learning_rate": 9.714635698164456e-05, "loss": 0.9915, "step": 7295 }, { "epoch": 0.11, "learning_rate": 9.71424276866924e-05, "loss": 0.9902, "step": 7300 }, { "epoch": 0.11, "learning_rate": 9.7138495767975e-05, "loss": 1.0135, "step": 7305 }, { "epoch": 0.11, "learning_rate": 9.713456122571116e-05, "loss": 0.9719, "step": 7310 }, { "epoch": 0.11, "learning_rate": 9.713062406011988e-05, "loss": 0.9919, "step": 7315 }, { "epoch": 0.11, "learning_rate": 9.712668427142029e-05, "loss": 0.9705, "step": 7320 }, { "epoch": 0.11, "learning_rate": 9.712274185983166e-05, "loss": 0.9812, "step": 7325 }, { "epoch": 0.11, "learning_rate": 9.711879682557339e-05, "loss": 0.985, "step": 7330 }, { "epoch": 0.11, "learning_rate": 9.711484916886509e-05, "loss": 0.9515, "step": 7335 }, { "epoch": 0.11, "learning_rate": 9.711089888992641e-05, "loss": 0.9844, "step": 7340 }, { "epoch": 0.11, "learning_rate": 9.710694598897727e-05, "loss": 0.9866, "step": 7345 }, { "epoch": 0.11, "learning_rate": 9.710299046623763e-05, "loss": 0.9968, "step": 7350 }, { "epoch": 0.11, "learning_rate": 9.709903232192765e-05, "loss": 0.981, "step": 7355 }, { "epoch": 0.11, "learning_rate": 9.709507155626764e-05, "loss": 0.9702, "step": 7360 }, { "epoch": 0.11, "learning_rate": 9.709110816947802e-05, "loss": 0.9841, "step": 7365 }, { "epoch": 0.11, "learning_rate": 9.708714216177939e-05, "loss": 0.9659, "step": 7370 }, { "epoch": 0.11, "learning_rate": 9.70831735333925e-05, "loss": 0.966, "step": 7375 }, { "epoch": 0.11, "learning_rate": 9.707920228453817e-05, "loss": 0.9837, "step": 7380 }, { "epoch": 0.11, "learning_rate": 9.707522841543749e-05, "loss": 0.9441, "step": 7385 }, { "epoch": 0.11, "learning_rate": 9.707125192631158e-05, "loss": 0.9751, "step": 7390 }, { "epoch": 0.11, "learning_rate": 9.706727281738179e-05, "loss": 0.987, "step": 7395 }, { "epoch": 0.11, "learning_rate": 9.706329108886956e-05, "loss": 0.9857, "step": 7400 }, { "epoch": 0.11, "learning_rate": 9.705930674099653e-05, "loss": 0.9712, "step": 7405 }, { "epoch": 0.11, "learning_rate": 9.70553197739844e-05, "loss": 0.9983, "step": 7410 }, { "epoch": 0.11, "learning_rate": 9.705133018805512e-05, "loss": 0.9923, "step": 7415 }, { "epoch": 0.11, "learning_rate": 9.70473379834307e-05, "loss": 1.0078, "step": 7420 }, { "epoch": 0.11, "learning_rate": 9.704334316033337e-05, "loss": 0.9589, "step": 7425 }, { "epoch": 0.11, "learning_rate": 9.703934571898543e-05, "loss": 0.9685, "step": 7430 }, { "epoch": 0.11, "learning_rate": 9.703534565960936e-05, "loss": 1.0048, "step": 7435 }, { "epoch": 0.11, "learning_rate": 9.70313429824278e-05, "loss": 0.9676, "step": 7440 }, { "epoch": 0.11, "learning_rate": 9.702733768766355e-05, "loss": 0.9672, "step": 7445 }, { "epoch": 0.11, "learning_rate": 9.70233297755395e-05, "loss": 0.9796, "step": 7450 }, { "epoch": 0.11, "learning_rate": 9.701931924627869e-05, "loss": 0.9818, "step": 7455 }, { "epoch": 0.11, "learning_rate": 9.701530610010439e-05, "loss": 0.9846, "step": 7460 }, { "epoch": 0.11, "learning_rate": 9.701129033723991e-05, "loss": 0.9739, "step": 7465 }, { "epoch": 0.11, "learning_rate": 9.700727195790878e-05, "loss": 0.9791, "step": 7470 }, { "epoch": 0.11, "learning_rate": 9.700325096233462e-05, "loss": 0.9495, "step": 7475 }, { "epoch": 0.11, "learning_rate": 9.699922735074124e-05, "loss": 0.9623, "step": 7480 }, { "epoch": 0.11, "learning_rate": 9.699520112335261e-05, "loss": 0.9835, "step": 7485 }, { "epoch": 0.11, "learning_rate": 9.699117228039274e-05, "loss": 0.983, "step": 7490 }, { "epoch": 0.11, "learning_rate": 9.698714082208594e-05, "loss": 1.0087, "step": 7495 }, { "epoch": 0.11, "learning_rate": 9.698310674865652e-05, "loss": 0.9621, "step": 7500 }, { "epoch": 0.11, "learning_rate": 9.697907006032902e-05, "loss": 0.9848, "step": 7505 }, { "epoch": 0.11, "learning_rate": 9.697503075732813e-05, "loss": 0.9801, "step": 7510 }, { "epoch": 0.11, "learning_rate": 9.697098883987865e-05, "loss": 1.0025, "step": 7515 }, { "epoch": 0.11, "learning_rate": 9.696694430820552e-05, "loss": 0.9754, "step": 7520 }, { "epoch": 0.11, "learning_rate": 9.696289716253386e-05, "loss": 1.0039, "step": 7525 }, { "epoch": 0.11, "learning_rate": 9.695884740308892e-05, "loss": 0.9996, "step": 7530 }, { "epoch": 0.11, "learning_rate": 9.695479503009609e-05, "loss": 0.9788, "step": 7535 }, { "epoch": 0.11, "learning_rate": 9.69507400437809e-05, "loss": 0.9741, "step": 7540 }, { "epoch": 0.11, "learning_rate": 9.694668244436906e-05, "loss": 0.989, "step": 7545 }, { "epoch": 0.11, "learning_rate": 9.694262223208637e-05, "loss": 0.9806, "step": 7550 }, { "epoch": 0.11, "learning_rate": 9.693855940715882e-05, "loss": 0.9935, "step": 7555 }, { "epoch": 0.11, "learning_rate": 9.693449396981253e-05, "loss": 0.9514, "step": 7560 }, { "epoch": 0.11, "learning_rate": 9.693042592027376e-05, "loss": 0.9881, "step": 7565 }, { "epoch": 0.11, "learning_rate": 9.692635525876894e-05, "loss": 0.979, "step": 7570 }, { "epoch": 0.11, "learning_rate": 9.692228198552462e-05, "loss": 0.9721, "step": 7575 }, { "epoch": 0.11, "learning_rate": 9.691820610076748e-05, "loss": 0.9409, "step": 7580 }, { "epoch": 0.11, "learning_rate": 9.691412760472441e-05, "loss": 0.9737, "step": 7585 }, { "epoch": 0.11, "learning_rate": 9.691004649762237e-05, "loss": 0.9765, "step": 7590 }, { "epoch": 0.11, "learning_rate": 9.690596277968849e-05, "loss": 0.9971, "step": 7595 }, { "epoch": 0.11, "learning_rate": 9.69018764511501e-05, "loss": 0.9796, "step": 7600 }, { "epoch": 0.11, "learning_rate": 9.689778751223459e-05, "loss": 0.9688, "step": 7605 }, { "epoch": 0.11, "learning_rate": 9.689369596316954e-05, "loss": 0.9516, "step": 7610 }, { "epoch": 0.11, "learning_rate": 9.688960180418269e-05, "loss": 0.9822, "step": 7615 }, { "epoch": 0.11, "learning_rate": 9.688550503550188e-05, "loss": 0.972, "step": 7620 }, { "epoch": 0.11, "learning_rate": 9.688140565735513e-05, "loss": 0.9863, "step": 7625 }, { "epoch": 0.11, "learning_rate": 9.687730366997061e-05, "loss": 0.9868, "step": 7630 }, { "epoch": 0.11, "learning_rate": 9.68731990735766e-05, "loss": 0.9655, "step": 7635 }, { "epoch": 0.11, "learning_rate": 9.686909186840157e-05, "loss": 0.998, "step": 7640 }, { "epoch": 0.11, "learning_rate": 9.686498205467407e-05, "loss": 1.0014, "step": 7645 }, { "epoch": 0.11, "learning_rate": 9.686086963262286e-05, "loss": 0.95, "step": 7650 }, { "epoch": 0.11, "learning_rate": 9.685675460247684e-05, "loss": 1.0098, "step": 7655 }, { "epoch": 0.11, "learning_rate": 9.685263696446502e-05, "loss": 0.953, "step": 7660 }, { "epoch": 0.11, "learning_rate": 9.684851671881656e-05, "loss": 0.9916, "step": 7665 }, { "epoch": 0.12, "learning_rate": 9.684439386576079e-05, "loss": 0.9786, "step": 7670 }, { "epoch": 0.12, "learning_rate": 9.684026840552718e-05, "loss": 0.9837, "step": 7675 }, { "epoch": 0.12, "learning_rate": 9.683614033834532e-05, "loss": 0.9752, "step": 7680 }, { "epoch": 0.12, "learning_rate": 9.683200966444497e-05, "loss": 0.9654, "step": 7685 }, { "epoch": 0.12, "learning_rate": 9.682787638405602e-05, "loss": 0.997, "step": 7690 }, { "epoch": 0.12, "learning_rate": 9.682374049740853e-05, "loss": 0.98, "step": 7695 }, { "epoch": 0.12, "learning_rate": 9.681960200473268e-05, "loss": 0.9661, "step": 7700 }, { "epoch": 0.12, "learning_rate": 9.68154609062588e-05, "loss": 0.9783, "step": 7705 }, { "epoch": 0.12, "learning_rate": 9.681131720221734e-05, "loss": 0.9682, "step": 7710 }, { "epoch": 0.12, "learning_rate": 9.680717089283897e-05, "loss": 0.9607, "step": 7715 }, { "epoch": 0.12, "learning_rate": 9.680302197835444e-05, "loss": 0.9975, "step": 7720 }, { "epoch": 0.12, "learning_rate": 9.679887045899466e-05, "loss": 0.9641, "step": 7725 }, { "epoch": 0.12, "learning_rate": 9.679471633499068e-05, "loss": 0.9951, "step": 7730 }, { "epoch": 0.12, "learning_rate": 9.679055960657371e-05, "loss": 0.9815, "step": 7735 }, { "epoch": 0.12, "learning_rate": 9.67864002739751e-05, "loss": 1.002, "step": 7740 }, { "epoch": 0.12, "learning_rate": 9.678223833742634e-05, "loss": 0.9816, "step": 7745 }, { "epoch": 0.12, "learning_rate": 9.677807379715907e-05, "loss": 1.0019, "step": 7750 }, { "epoch": 0.12, "learning_rate": 9.677390665340507e-05, "loss": 1.001, "step": 7755 }, { "epoch": 0.12, "learning_rate": 9.676973690639626e-05, "loss": 0.967, "step": 7760 }, { "epoch": 0.12, "learning_rate": 9.676556455636472e-05, "loss": 0.9691, "step": 7765 }, { "epoch": 0.12, "learning_rate": 9.676138960354267e-05, "loss": 0.9879, "step": 7770 }, { "epoch": 0.12, "learning_rate": 9.675721204816246e-05, "loss": 0.9975, "step": 7775 }, { "epoch": 0.12, "learning_rate": 9.67530318904566e-05, "loss": 0.985, "step": 7780 }, { "epoch": 0.12, "learning_rate": 9.674884913065774e-05, "loss": 0.9828, "step": 7785 }, { "epoch": 0.12, "learning_rate": 9.674466376899869e-05, "loss": 0.9837, "step": 7790 }, { "epoch": 0.12, "learning_rate": 9.674047580571239e-05, "loss": 0.9776, "step": 7795 }, { "epoch": 0.12, "learning_rate": 9.673628524103192e-05, "loss": 0.9805, "step": 7800 }, { "epoch": 0.12, "learning_rate": 9.673209207519048e-05, "loss": 0.9979, "step": 7805 }, { "epoch": 0.12, "learning_rate": 9.67278963084215e-05, "loss": 0.9803, "step": 7810 }, { "epoch": 0.12, "learning_rate": 9.672369794095846e-05, "loss": 0.9786, "step": 7815 }, { "epoch": 0.12, "learning_rate": 9.671949697303504e-05, "loss": 0.9614, "step": 7820 }, { "epoch": 0.12, "learning_rate": 9.671529340488505e-05, "loss": 0.9918, "step": 7825 }, { "epoch": 0.12, "learning_rate": 9.671108723674245e-05, "loss": 1.0101, "step": 7830 }, { "epoch": 0.12, "learning_rate": 9.670687846884132e-05, "loss": 0.9961, "step": 7835 }, { "epoch": 0.12, "learning_rate": 9.670266710141593e-05, "loss": 0.9959, "step": 7840 }, { "epoch": 0.12, "learning_rate": 9.669845313470065e-05, "loss": 0.986, "step": 7845 }, { "epoch": 0.12, "learning_rate": 9.669423656893002e-05, "loss": 0.9985, "step": 7850 }, { "epoch": 0.12, "learning_rate": 9.66900174043387e-05, "loss": 0.9962, "step": 7855 }, { "epoch": 0.12, "learning_rate": 9.668579564116153e-05, "loss": 0.9837, "step": 7860 }, { "epoch": 0.12, "learning_rate": 9.668157127963348e-05, "loss": 0.9705, "step": 7865 }, { "epoch": 0.12, "learning_rate": 9.667734431998967e-05, "loss": 0.9659, "step": 7870 }, { "epoch": 0.12, "learning_rate": 9.667311476246532e-05, "loss": 0.97, "step": 7875 }, { "epoch": 0.12, "learning_rate": 9.666888260729586e-05, "loss": 0.9655, "step": 7880 }, { "epoch": 0.12, "learning_rate": 9.666464785471682e-05, "loss": 0.9548, "step": 7885 }, { "epoch": 0.12, "learning_rate": 9.666041050496392e-05, "loss": 0.9999, "step": 7890 }, { "epoch": 0.12, "learning_rate": 9.665617055827297e-05, "loss": 0.9691, "step": 7895 }, { "epoch": 0.12, "learning_rate": 9.665192801487993e-05, "loss": 0.976, "step": 7900 }, { "epoch": 0.12, "learning_rate": 9.664768287502096e-05, "loss": 1.0013, "step": 7905 }, { "epoch": 0.12, "learning_rate": 9.664343513893231e-05, "loss": 0.9877, "step": 7910 }, { "epoch": 0.12, "learning_rate": 9.66391848068504e-05, "loss": 0.9598, "step": 7915 }, { "epoch": 0.12, "learning_rate": 9.663493187901178e-05, "loss": 1.0041, "step": 7920 }, { "epoch": 0.12, "learning_rate": 9.663067635565317e-05, "loss": 0.9729, "step": 7925 }, { "epoch": 0.12, "learning_rate": 9.662641823701137e-05, "loss": 0.9876, "step": 7930 }, { "epoch": 0.12, "learning_rate": 9.662215752332342e-05, "loss": 0.9843, "step": 7935 }, { "epoch": 0.12, "learning_rate": 9.661789421482643e-05, "loss": 0.9753, "step": 7940 }, { "epoch": 0.12, "learning_rate": 9.661362831175769e-05, "loss": 0.9572, "step": 7945 }, { "epoch": 0.12, "learning_rate": 9.660935981435462e-05, "loss": 0.9711, "step": 7950 }, { "epoch": 0.12, "learning_rate": 9.660508872285479e-05, "loss": 0.9875, "step": 7955 }, { "epoch": 0.12, "learning_rate": 9.66008150374959e-05, "loss": 0.9961, "step": 7960 }, { "epoch": 0.12, "learning_rate": 9.659653875851582e-05, "loss": 0.9995, "step": 7965 }, { "epoch": 0.12, "learning_rate": 9.659225988615254e-05, "loss": 0.9804, "step": 7970 }, { "epoch": 0.12, "learning_rate": 9.658797842064422e-05, "loss": 0.9638, "step": 7975 }, { "epoch": 0.12, "learning_rate": 9.658369436222914e-05, "loss": 0.9982, "step": 7980 }, { "epoch": 0.12, "learning_rate": 9.657940771114575e-05, "loss": 0.9639, "step": 7985 }, { "epoch": 0.12, "learning_rate": 9.65751184676326e-05, "loss": 0.975, "step": 7990 }, { "epoch": 0.12, "learning_rate": 9.657082663192843e-05, "loss": 0.9932, "step": 7995 }, { "epoch": 0.12, "learning_rate": 9.65665322042721e-05, "loss": 0.9915, "step": 8000 } ], "logging_steps": 5, "max_steps": 66683, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 2.122051858150195e+19, "trial_name": null, "trial_params": null }