diff --git "a/tmp-checkpoint-15416/trainer_state.json" "b/tmp-checkpoint-15416/trainer_state.json" new file mode 100644--- /dev/null +++ "b/tmp-checkpoint-15416/trainer_state.json" @@ -0,0 +1,9427 @@ +{ + "best_metric": 0.9808588624000549, + "best_model_checkpoint": "swinv2-base-patch4-window12-192-22k-ConcreteClassifier-PVT\\checkpoint-1927", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 15416, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 1.7298045320878742e-06, + "loss": 2.3066, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 3.4596090641757485e-06, + "loss": 2.131, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 5.1894135962636225e-06, + "loss": 2.0104, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 6.919218128351497e-06, + "loss": 1.8938, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 8.649022660439371e-06, + "loss": 1.7743, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 1.0378827192527245e-05, + "loss": 1.73, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 1.2108631724615119e-05, + "loss": 1.6552, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 1.3838436256702994e-05, + "loss": 1.4119, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 1.5568240788790867e-05, + "loss": 1.1716, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 1.7298045320878743e-05, + "loss": 1.2431, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 1.9027849852966615e-05, + "loss": 1.1115, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 2.075765438505449e-05, + "loss": 1.0365, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 2.2487458917142365e-05, + "loss": 1.2179, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 2.4217263449230237e-05, + "loss": 1.1192, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 2.5947067981318112e-05, + "loss": 0.8697, + "step": 150 + }, + { + "epoch": 0.08, + "learning_rate": 2.7676872513405988e-05, + "loss": 1.0286, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 2.940667704549386e-05, + "loss": 0.9186, + "step": 170 + }, + { + "epoch": 0.09, + "learning_rate": 3.1136481577581735e-05, + "loss": 1.0909, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 3.286628610966961e-05, + "loss": 1.0519, + "step": 190 + }, + { + "epoch": 0.1, + "learning_rate": 3.4596090641757486e-05, + "loss": 1.1043, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 3.632589517384536e-05, + "loss": 0.8634, + "step": 210 + }, + { + "epoch": 0.11, + "learning_rate": 3.805569970593323e-05, + "loss": 0.86, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 3.97855042380211e-05, + "loss": 0.8325, + "step": 230 + }, + { + "epoch": 0.12, + "learning_rate": 4.151530877010898e-05, + "loss": 1.1083, + "step": 240 + }, + { + "epoch": 0.13, + "learning_rate": 4.324511330219685e-05, + "loss": 1.0759, + "step": 250 + }, + { + "epoch": 0.13, + "learning_rate": 4.497491783428473e-05, + "loss": 0.9178, + "step": 260 + }, + { + "epoch": 0.14, + "learning_rate": 4.67047223663726e-05, + "loss": 1.1132, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.8434526898460474e-05, + "loss": 1.0484, + "step": 280 + }, + { + "epoch": 0.15, + "learning_rate": 5.0164331430548346e-05, + "loss": 0.8629, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 5.1894135962636225e-05, + "loss": 0.6426, + "step": 300 + }, + { + "epoch": 0.16, + "learning_rate": 5.36239404947241e-05, + "loss": 1.0117, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 5.5353745026811976e-05, + "loss": 0.9769, + "step": 320 + }, + { + "epoch": 0.17, + "learning_rate": 5.708354955889985e-05, + "loss": 0.9459, + "step": 330 + }, + { + "epoch": 0.18, + "learning_rate": 5.881335409098772e-05, + "loss": 1.3208, + "step": 340 + }, + { + "epoch": 0.18, + "learning_rate": 6.054315862307559e-05, + "loss": 0.9631, + "step": 350 + }, + { + "epoch": 0.19, + "learning_rate": 6.227296315516347e-05, + "loss": 1.3533, + "step": 360 + }, + { + "epoch": 0.19, + "learning_rate": 6.400276768725135e-05, + "loss": 1.2228, + "step": 370 + }, + { + "epoch": 0.2, + "learning_rate": 6.573257221933921e-05, + "loss": 0.9195, + "step": 380 + }, + { + "epoch": 0.2, + "learning_rate": 6.746237675142709e-05, + "loss": 1.0466, + "step": 390 + }, + { + "epoch": 0.21, + "learning_rate": 6.919218128351497e-05, + "loss": 0.6537, + "step": 400 + }, + { + "epoch": 0.21, + "learning_rate": 7.092198581560285e-05, + "loss": 1.0766, + "step": 410 + }, + { + "epoch": 0.22, + "learning_rate": 7.265179034769071e-05, + "loss": 1.3196, + "step": 420 + }, + { + "epoch": 0.22, + "learning_rate": 7.438159487977858e-05, + "loss": 1.1902, + "step": 430 + }, + { + "epoch": 0.23, + "learning_rate": 7.611139941186646e-05, + "loss": 0.8251, + "step": 440 + }, + { + "epoch": 0.23, + "learning_rate": 7.784120394395434e-05, + "loss": 0.6196, + "step": 450 + }, + { + "epoch": 0.24, + "learning_rate": 7.95710084760422e-05, + "loss": 1.1941, + "step": 460 + }, + { + "epoch": 0.24, + "learning_rate": 8.130081300813008e-05, + "loss": 0.9509, + "step": 470 + }, + { + "epoch": 0.25, + "learning_rate": 8.303061754021796e-05, + "loss": 1.0669, + "step": 480 + }, + { + "epoch": 0.25, + "learning_rate": 8.476042207230583e-05, + "loss": 0.8493, + "step": 490 + }, + { + "epoch": 0.26, + "learning_rate": 8.64902266043937e-05, + "loss": 1.0381, + "step": 500 + }, + { + "epoch": 0.26, + "learning_rate": 8.822003113648158e-05, + "loss": 0.6853, + "step": 510 + }, + { + "epoch": 0.27, + "learning_rate": 8.994983566856946e-05, + "loss": 1.0025, + "step": 520 + }, + { + "epoch": 0.28, + "learning_rate": 9.167964020065733e-05, + "loss": 0.9758, + "step": 530 + }, + { + "epoch": 0.28, + "learning_rate": 9.34094447327452e-05, + "loss": 1.2263, + "step": 540 + }, + { + "epoch": 0.29, + "learning_rate": 9.513924926483307e-05, + "loss": 1.0245, + "step": 550 + }, + { + "epoch": 0.29, + "learning_rate": 9.686905379692095e-05, + "loss": 0.9136, + "step": 560 + }, + { + "epoch": 0.3, + "learning_rate": 9.859885832900881e-05, + "loss": 1.0586, + "step": 570 + }, + { + "epoch": 0.3, + "learning_rate": 0.00010032866286109669, + "loss": 1.029, + "step": 580 + }, + { + "epoch": 0.31, + "learning_rate": 0.00010205846739318457, + "loss": 0.9973, + "step": 590 + }, + { + "epoch": 0.31, + "learning_rate": 0.00010378827192527245, + "loss": 0.7652, + "step": 600 + }, + { + "epoch": 0.32, + "learning_rate": 0.00010551807645736032, + "loss": 1.2198, + "step": 610 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001072478809894482, + "loss": 1.3727, + "step": 620 + }, + { + "epoch": 0.33, + "learning_rate": 0.00010897768552153607, + "loss": 0.9157, + "step": 630 + }, + { + "epoch": 0.33, + "learning_rate": 0.00011070749005362395, + "loss": 1.4063, + "step": 640 + }, + { + "epoch": 0.34, + "learning_rate": 0.00011243729458571182, + "loss": 0.9697, + "step": 650 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001141670991177997, + "loss": 1.0678, + "step": 660 + }, + { + "epoch": 0.35, + "learning_rate": 0.00011589690364988756, + "loss": 1.1373, + "step": 670 + }, + { + "epoch": 0.35, + "learning_rate": 0.00011762670818197544, + "loss": 1.1127, + "step": 680 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001193565127140633, + "loss": 0.8828, + "step": 690 + }, + { + "epoch": 0.36, + "learning_rate": 0.00012108631724615118, + "loss": 1.0497, + "step": 700 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012281612177823906, + "loss": 1.0252, + "step": 710 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012454592631032694, + "loss": 1.0175, + "step": 720 + }, + { + "epoch": 0.38, + "learning_rate": 0.00012627573084241482, + "loss": 1.1303, + "step": 730 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001280055353745027, + "loss": 0.8751, + "step": 740 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012973533990659055, + "loss": 0.9646, + "step": 750 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013146514443867843, + "loss": 1.0412, + "step": 760 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001331949489707663, + "loss": 1.3243, + "step": 770 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013492475350285418, + "loss": 1.0182, + "step": 780 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013665455803494206, + "loss": 1.3814, + "step": 790 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013838436256702994, + "loss": 0.9131, + "step": 800 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014011416709911782, + "loss": 1.0305, + "step": 810 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001418439716312057, + "loss": 0.9296, + "step": 820 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014357377616329355, + "loss": 1.1304, + "step": 830 + }, + { + "epoch": 0.44, + "learning_rate": 0.00014530358069538143, + "loss": 0.964, + "step": 840 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001470333852274693, + "loss": 0.9223, + "step": 850 + }, + { + "epoch": 0.45, + "learning_rate": 0.00014876318975955716, + "loss": 0.8419, + "step": 860 + }, + { + "epoch": 0.45, + "learning_rate": 0.00015049299429164504, + "loss": 0.7896, + "step": 870 + }, + { + "epoch": 0.46, + "learning_rate": 0.00015222279882373292, + "loss": 1.2753, + "step": 880 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001539526033558208, + "loss": 1.01, + "step": 890 + }, + { + "epoch": 0.47, + "learning_rate": 0.00015568240788790867, + "loss": 0.8729, + "step": 900 + }, + { + "epoch": 0.47, + "learning_rate": 0.00015741221241999653, + "loss": 1.2337, + "step": 910 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001591420169520844, + "loss": 1.0598, + "step": 920 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016087182148417228, + "loss": 0.9216, + "step": 930 + }, + { + "epoch": 0.49, + "learning_rate": 0.00016260162601626016, + "loss": 1.2614, + "step": 940 + }, + { + "epoch": 0.49, + "learning_rate": 0.00016433143054834804, + "loss": 0.9602, + "step": 950 + }, + { + "epoch": 0.5, + "learning_rate": 0.00016606123508043592, + "loss": 1.1126, + "step": 960 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001677910396125238, + "loss": 0.7103, + "step": 970 + }, + { + "epoch": 0.51, + "learning_rate": 0.00016952084414461165, + "loss": 1.0462, + "step": 980 + }, + { + "epoch": 0.51, + "learning_rate": 0.00017125064867669953, + "loss": 0.9843, + "step": 990 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001729804532087874, + "loss": 1.2419, + "step": 1000 + }, + { + "epoch": 0.52, + "learning_rate": 0.00017471025774087529, + "loss": 1.3823, + "step": 1010 + }, + { + "epoch": 0.53, + "learning_rate": 0.00017644006227296316, + "loss": 0.9726, + "step": 1020 + }, + { + "epoch": 0.53, + "learning_rate": 0.00017816986680505104, + "loss": 1.1238, + "step": 1030 + }, + { + "epoch": 0.54, + "learning_rate": 0.00017989967133713892, + "loss": 0.934, + "step": 1040 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001816294758692268, + "loss": 1.5093, + "step": 1050 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018335928040131465, + "loss": 1.0234, + "step": 1060 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018508908493340253, + "loss": 0.7998, + "step": 1070 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001868188894654904, + "loss": 1.0191, + "step": 1080 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001885486939975783, + "loss": 1.1171, + "step": 1090 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019027849852966614, + "loss": 1.1417, + "step": 1100 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019200830306175402, + "loss": 0.9539, + "step": 1110 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001937381075938419, + "loss": 1.0955, + "step": 1120 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019546791212592978, + "loss": 1.3748, + "step": 1130 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019719771665801763, + "loss": 1.1126, + "step": 1140 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001989275211901055, + "loss": 1.0674, + "step": 1150 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020065732572219339, + "loss": 1.1236, + "step": 1160 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020238713025428126, + "loss": 1.0824, + "step": 1170 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020411693478636914, + "loss": 1.1181, + "step": 1180 + }, + { + "epoch": 0.62, + "learning_rate": 0.00020584673931845702, + "loss": 1.0881, + "step": 1190 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002075765438505449, + "loss": 1.1874, + "step": 1200 + }, + { + "epoch": 0.63, + "learning_rate": 0.00020930634838263278, + "loss": 1.0377, + "step": 1210 + }, + { + "epoch": 0.63, + "learning_rate": 0.00021103615291472063, + "loss": 0.8027, + "step": 1220 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002127659574468085, + "loss": 0.8027, + "step": 1230 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002144957619788964, + "loss": 1.2303, + "step": 1240 + }, + { + "epoch": 0.65, + "learning_rate": 0.00021622556651098427, + "loss": 1.2914, + "step": 1250 + }, + { + "epoch": 0.65, + "learning_rate": 0.00021795537104307214, + "loss": 1.2018, + "step": 1260 + }, + { + "epoch": 0.66, + "learning_rate": 0.00021968517557516002, + "loss": 0.994, + "step": 1270 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002214149801072479, + "loss": 1.1917, + "step": 1280 + }, + { + "epoch": 0.67, + "learning_rate": 0.00022314478463933578, + "loss": 0.9846, + "step": 1290 + }, + { + "epoch": 0.67, + "learning_rate": 0.00022487458917142363, + "loss": 1.1561, + "step": 1300 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002266043937035115, + "loss": 1.3345, + "step": 1310 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002283341982355994, + "loss": 1.1663, + "step": 1320 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023006400276768727, + "loss": 1.0486, + "step": 1330 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023179380729977512, + "loss": 1.2478, + "step": 1340 + }, + { + "epoch": 0.7, + "learning_rate": 0.000233523611831863, + "loss": 1.1334, + "step": 1350 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023525341636395088, + "loss": 1.1248, + "step": 1360 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023698322089603876, + "loss": 1.2183, + "step": 1370 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002387130254281266, + "loss": 1.4618, + "step": 1380 + }, + { + "epoch": 0.72, + "learning_rate": 0.00024044282996021449, + "loss": 1.5796, + "step": 1390 + }, + { + "epoch": 0.73, + "learning_rate": 0.00024217263449230237, + "loss": 1.4877, + "step": 1400 + }, + { + "epoch": 0.73, + "learning_rate": 0.00024390243902439024, + "loss": 0.9612, + "step": 1410 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002456322435564781, + "loss": 1.3634, + "step": 1420 + }, + { + "epoch": 0.74, + "learning_rate": 0.000247362048088566, + "loss": 1.1377, + "step": 1430 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002490918526206539, + "loss": 1.1365, + "step": 1440 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025082165715274176, + "loss": 1.2433, + "step": 1450 + }, + { + "epoch": 0.76, + "learning_rate": 0.00025255146168482964, + "loss": 1.2164, + "step": 1460 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002542812662169175, + "loss": 1.0668, + "step": 1470 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002560110707490054, + "loss": 1.174, + "step": 1480 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002577408752810932, + "loss": 1.1307, + "step": 1490 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002594706798131811, + "loss": 1.2406, + "step": 1500 + }, + { + "epoch": 0.78, + "learning_rate": 0.000261200484345269, + "loss": 1.2542, + "step": 1510 + }, + { + "epoch": 0.79, + "learning_rate": 0.00026293028887735685, + "loss": 0.9126, + "step": 1520 + }, + { + "epoch": 0.79, + "learning_rate": 0.00026466009340944473, + "loss": 1.1105, + "step": 1530 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002663898979415326, + "loss": 0.8491, + "step": 1540 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002681197024736205, + "loss": 1.4498, + "step": 1550 + }, + { + "epoch": 0.81, + "learning_rate": 0.00026984950700570837, + "loss": 1.3299, + "step": 1560 + }, + { + "epoch": 0.81, + "learning_rate": 0.00027157931153779625, + "loss": 1.2222, + "step": 1570 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002733091160698841, + "loss": 0.9812, + "step": 1580 + }, + { + "epoch": 0.83, + "learning_rate": 0.000275038920601972, + "loss": 1.2824, + "step": 1590 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002767687251340599, + "loss": 1.1105, + "step": 1600 + }, + { + "epoch": 0.84, + "learning_rate": 0.00027849852966614776, + "loss": 1.0978, + "step": 1610 + }, + { + "epoch": 0.84, + "learning_rate": 0.00028022833419823564, + "loss": 1.1159, + "step": 1620 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002819581387303235, + "loss": 1.1441, + "step": 1630 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002836879432624114, + "loss": 1.2566, + "step": 1640 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002854177477944992, + "loss": 1.1327, + "step": 1650 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002871475523265871, + "loss": 1.232, + "step": 1660 + }, + { + "epoch": 0.87, + "learning_rate": 0.000288877356858675, + "loss": 1.0974, + "step": 1670 + }, + { + "epoch": 0.87, + "learning_rate": 0.00029060716139076286, + "loss": 1.0083, + "step": 1680 + }, + { + "epoch": 0.88, + "learning_rate": 0.00029233696592285074, + "loss": 1.5513, + "step": 1690 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002940667704549386, + "loss": 1.3731, + "step": 1700 + }, + { + "epoch": 0.89, + "learning_rate": 0.00029579657498702644, + "loss": 1.1877, + "step": 1710 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002975263795191143, + "loss": 1.199, + "step": 1720 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002992561840512022, + "loss": 1.4768, + "step": 1730 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003009859885832901, + "loss": 1.7352, + "step": 1740 + }, + { + "epoch": 0.91, + "learning_rate": 0.00030271579311537796, + "loss": 1.3652, + "step": 1750 + }, + { + "epoch": 0.91, + "learning_rate": 0.00030444559764746583, + "loss": 1.1335, + "step": 1760 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003061754021795537, + "loss": 1.2259, + "step": 1770 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003079052067116416, + "loss": 1.3024, + "step": 1780 + }, + { + "epoch": 0.93, + "learning_rate": 0.00030963501124372947, + "loss": 1.4433, + "step": 1790 + }, + { + "epoch": 0.93, + "learning_rate": 0.00031136481577581735, + "loss": 1.1668, + "step": 1800 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003130946203079052, + "loss": 1.3298, + "step": 1810 + }, + { + "epoch": 0.94, + "learning_rate": 0.00031482442483999305, + "loss": 1.2601, + "step": 1820 + }, + { + "epoch": 0.95, + "learning_rate": 0.00031655422937208093, + "loss": 1.1204, + "step": 1830 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003182840339041688, + "loss": 1.2142, + "step": 1840 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003200138384362567, + "loss": 0.93, + "step": 1850 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032174364296834457, + "loss": 1.4335, + "step": 1860 + }, + { + "epoch": 0.97, + "learning_rate": 0.00032347344750043245, + "loss": 1.3081, + "step": 1870 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003252032520325203, + "loss": 1.2692, + "step": 1880 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003269330565646082, + "loss": 1.2172, + "step": 1890 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003286628610966961, + "loss": 1.2254, + "step": 1900 + }, + { + "epoch": 0.99, + "learning_rate": 0.00033039266562878396, + "loss": 1.1448, + "step": 1910 + }, + { + "epoch": 1.0, + "learning_rate": 0.00033212247016087184, + "loss": 1.2473, + "step": 1920 + }, + { + "epoch": 1.0, + "eval_accuracy": { + "accuracy": 0.5909208819714656 + }, + "eval_f1": { + "f1": 0.5910435702573883 + }, + "eval_loss": 0.9808588624000549, + "eval_precision": { + "precision": 0.6661654194259725 + }, + "eval_recall": { + "recall": 0.6016064778916844 + }, + "eval_runtime": 243.6104, + "eval_samples_per_second": 15.824, + "eval_steps_per_second": 7.914, + "step": 1927 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003338522746929597, + "loss": 1.1687, + "step": 1930 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003355820792250476, + "loss": 1.4294, + "step": 1940 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003373118837571355, + "loss": 1.2866, + "step": 1950 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003390416882892233, + "loss": 1.0858, + "step": 1960 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003407714928213112, + "loss": 1.1465, + "step": 1970 + }, + { + "epoch": 1.03, + "learning_rate": 0.00034250129735339906, + "loss": 1.2876, + "step": 1980 + }, + { + "epoch": 1.03, + "learning_rate": 0.00034423110188548694, + "loss": 1.0266, + "step": 1990 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003459609064175748, + "loss": 1.1494, + "step": 2000 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003476907109496627, + "loss": 1.5705, + "step": 2010 + }, + { + "epoch": 1.05, + "learning_rate": 0.00034942051548175057, + "loss": 0.9916, + "step": 2020 + }, + { + "epoch": 1.05, + "learning_rate": 0.00035115032001383845, + "loss": 1.2183, + "step": 2030 + }, + { + "epoch": 1.06, + "learning_rate": 0.00035288012454592633, + "loss": 1.1186, + "step": 2040 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003546099290780142, + "loss": 1.1386, + "step": 2050 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003563397336101021, + "loss": 1.413, + "step": 2060 + }, + { + "epoch": 1.07, + "learning_rate": 0.00035806953814218997, + "loss": 1.4434, + "step": 2070 + }, + { + "epoch": 1.08, + "learning_rate": 0.00035979934267427784, + "loss": 1.3144, + "step": 2080 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003615291472063657, + "loss": 1.4108, + "step": 2090 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003632589517384536, + "loss": 1.2305, + "step": 2100 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003649887562705415, + "loss": 1.2335, + "step": 2110 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003667185608026293, + "loss": 1.3617, + "step": 2120 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003684483653347172, + "loss": 1.1457, + "step": 2130 + }, + { + "epoch": 1.11, + "learning_rate": 0.00037017816986680506, + "loss": 1.0253, + "step": 2140 + }, + { + "epoch": 1.12, + "learning_rate": 0.00037190797439889294, + "loss": 1.1486, + "step": 2150 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003736377789309808, + "loss": 1.2603, + "step": 2160 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003753675834630687, + "loss": 1.25, + "step": 2170 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003770973879951566, + "loss": 1.5531, + "step": 2180 + }, + { + "epoch": 1.14, + "learning_rate": 0.0003788271925272444, + "loss": 1.3442, + "step": 2190 + }, + { + "epoch": 1.14, + "learning_rate": 0.0003805569970593323, + "loss": 1.1317, + "step": 2200 + }, + { + "epoch": 1.15, + "learning_rate": 0.00038228680159142016, + "loss": 1.3271, + "step": 2210 + }, + { + "epoch": 1.15, + "learning_rate": 0.00038401660612350804, + "loss": 1.3165, + "step": 2220 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003857464106555959, + "loss": 1.2377, + "step": 2230 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003874762151876838, + "loss": 1.2677, + "step": 2240 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003892060197197717, + "loss": 1.2756, + "step": 2250 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039093582425185955, + "loss": 1.0193, + "step": 2260 + }, + { + "epoch": 1.18, + "learning_rate": 0.00039266562878394743, + "loss": 1.239, + "step": 2270 + }, + { + "epoch": 1.18, + "learning_rate": 0.00039439543331603526, + "loss": 1.6044, + "step": 2280 + }, + { + "epoch": 1.19, + "learning_rate": 0.00039612523784812313, + "loss": 1.2396, + "step": 2290 + }, + { + "epoch": 1.19, + "learning_rate": 0.000397855042380211, + "loss": 1.0345, + "step": 2300 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003995848469122989, + "loss": 1.3125, + "step": 2310 + }, + { + "epoch": 1.2, + "learning_rate": 0.00040131465144438677, + "loss": 1.4382, + "step": 2320 + }, + { + "epoch": 1.21, + "learning_rate": 0.00040304445597647465, + "loss": 1.1232, + "step": 2330 + }, + { + "epoch": 1.21, + "learning_rate": 0.00040477426050856253, + "loss": 1.3415, + "step": 2340 + }, + { + "epoch": 1.22, + "learning_rate": 0.0004065040650406504, + "loss": 1.3517, + "step": 2350 + }, + { + "epoch": 1.22, + "learning_rate": 0.0004082338695727383, + "loss": 1.1871, + "step": 2360 + }, + { + "epoch": 1.23, + "learning_rate": 0.00040996367410482616, + "loss": 1.541, + "step": 2370 + }, + { + "epoch": 1.24, + "learning_rate": 0.00041169347863691404, + "loss": 1.0871, + "step": 2380 + }, + { + "epoch": 1.24, + "learning_rate": 0.0004134232831690019, + "loss": 1.0727, + "step": 2390 + }, + { + "epoch": 1.25, + "learning_rate": 0.0004151530877010898, + "loss": 1.2664, + "step": 2400 + }, + { + "epoch": 1.25, + "learning_rate": 0.0004168828922331777, + "loss": 0.9518, + "step": 2410 + }, + { + "epoch": 1.26, + "learning_rate": 0.00041861269676526556, + "loss": 1.5851, + "step": 2420 + }, + { + "epoch": 1.26, + "learning_rate": 0.0004203425012973534, + "loss": 1.3433, + "step": 2430 + }, + { + "epoch": 1.27, + "learning_rate": 0.00042207230582944126, + "loss": 1.1357, + "step": 2440 + }, + { + "epoch": 1.27, + "learning_rate": 0.00042380211036152914, + "loss": 1.3124, + "step": 2450 + }, + { + "epoch": 1.28, + "learning_rate": 0.000425531914893617, + "loss": 1.3198, + "step": 2460 + }, + { + "epoch": 1.28, + "learning_rate": 0.0004272617194257049, + "loss": 1.31, + "step": 2470 + }, + { + "epoch": 1.29, + "learning_rate": 0.0004289915239577928, + "loss": 1.2563, + "step": 2480 + }, + { + "epoch": 1.29, + "learning_rate": 0.00043072132848988065, + "loss": 1.2553, + "step": 2490 + }, + { + "epoch": 1.3, + "learning_rate": 0.00043245113302196853, + "loss": 1.3314, + "step": 2500 + }, + { + "epoch": 1.3, + "learning_rate": 0.0004341809375540564, + "loss": 1.8077, + "step": 2510 + }, + { + "epoch": 1.31, + "learning_rate": 0.0004359107420861443, + "loss": 1.7739, + "step": 2520 + }, + { + "epoch": 1.31, + "learning_rate": 0.00043764054661823217, + "loss": 1.3835, + "step": 2530 + }, + { + "epoch": 1.32, + "learning_rate": 0.00043937035115032005, + "loss": 1.3689, + "step": 2540 + }, + { + "epoch": 1.32, + "learning_rate": 0.0004411001556824079, + "loss": 1.4259, + "step": 2550 + }, + { + "epoch": 1.33, + "learning_rate": 0.0004428299602144958, + "loss": 1.0997, + "step": 2560 + }, + { + "epoch": 1.33, + "learning_rate": 0.0004445597647465837, + "loss": 0.944, + "step": 2570 + }, + { + "epoch": 1.34, + "learning_rate": 0.00044628956927867156, + "loss": 1.431, + "step": 2580 + }, + { + "epoch": 1.34, + "learning_rate": 0.0004480193738107594, + "loss": 1.492, + "step": 2590 + }, + { + "epoch": 1.35, + "learning_rate": 0.00044974917834284726, + "loss": 1.0205, + "step": 2600 + }, + { + "epoch": 1.35, + "learning_rate": 0.00045147898287493514, + "loss": 1.2334, + "step": 2610 + }, + { + "epoch": 1.36, + "learning_rate": 0.000453208787407023, + "loss": 1.0434, + "step": 2620 + }, + { + "epoch": 1.36, + "learning_rate": 0.0004549385919391109, + "loss": 1.4912, + "step": 2630 + }, + { + "epoch": 1.37, + "learning_rate": 0.0004566683964711988, + "loss": 1.2939, + "step": 2640 + }, + { + "epoch": 1.38, + "learning_rate": 0.00045839820100328666, + "loss": 1.0389, + "step": 2650 + }, + { + "epoch": 1.38, + "learning_rate": 0.00046012800553537454, + "loss": 1.1925, + "step": 2660 + }, + { + "epoch": 1.39, + "learning_rate": 0.0004618578100674624, + "loss": 1.5455, + "step": 2670 + }, + { + "epoch": 1.39, + "learning_rate": 0.00046358761459955024, + "loss": 1.2916, + "step": 2680 + }, + { + "epoch": 1.4, + "learning_rate": 0.0004653174191316381, + "loss": 1.253, + "step": 2690 + }, + { + "epoch": 1.4, + "learning_rate": 0.000467047223663726, + "loss": 1.377, + "step": 2700 + }, + { + "epoch": 1.41, + "learning_rate": 0.0004687770281958139, + "loss": 1.4184, + "step": 2710 + }, + { + "epoch": 1.41, + "learning_rate": 0.00047050683272790175, + "loss": 1.3838, + "step": 2720 + }, + { + "epoch": 1.42, + "learning_rate": 0.00047223663725998963, + "loss": 1.1912, + "step": 2730 + }, + { + "epoch": 1.42, + "learning_rate": 0.0004739664417920775, + "loss": 1.0304, + "step": 2740 + }, + { + "epoch": 1.43, + "learning_rate": 0.00047569624632416534, + "loss": 1.3583, + "step": 2750 + }, + { + "epoch": 1.43, + "learning_rate": 0.0004774260508562532, + "loss": 1.3856, + "step": 2760 + }, + { + "epoch": 1.44, + "learning_rate": 0.0004791558553883411, + "loss": 1.5254, + "step": 2770 + }, + { + "epoch": 1.44, + "learning_rate": 0.00048088565992042897, + "loss": 1.2068, + "step": 2780 + }, + { + "epoch": 1.45, + "learning_rate": 0.00048261546445251685, + "loss": 1.3795, + "step": 2790 + }, + { + "epoch": 1.45, + "learning_rate": 0.00048434526898460473, + "loss": 1.5103, + "step": 2800 + }, + { + "epoch": 1.46, + "learning_rate": 0.0004860750735166926, + "loss": 1.6531, + "step": 2810 + }, + { + "epoch": 1.46, + "learning_rate": 0.0004878048780487805, + "loss": 1.3675, + "step": 2820 + }, + { + "epoch": 1.47, + "learning_rate": 0.0004895346825808684, + "loss": 1.6244, + "step": 2830 + }, + { + "epoch": 1.47, + "learning_rate": 0.0004912644871129562, + "loss": 1.8395, + "step": 2840 + }, + { + "epoch": 1.48, + "learning_rate": 0.0004929942916450441, + "loss": 1.6552, + "step": 2850 + }, + { + "epoch": 1.48, + "learning_rate": 0.000494724096177132, + "loss": 1.6432, + "step": 2860 + }, + { + "epoch": 1.49, + "learning_rate": 0.0004964539007092199, + "loss": 1.4839, + "step": 2870 + }, + { + "epoch": 1.49, + "learning_rate": 0.0004981837052413078, + "loss": 1.403, + "step": 2880 + }, + { + "epoch": 1.5, + "learning_rate": 0.0004999135097733956, + "loss": 1.8624, + "step": 2890 + }, + { + "epoch": 1.5, + "learning_rate": 0.0005016433143054835, + "loss": 1.4894, + "step": 2900 + }, + { + "epoch": 1.51, + "learning_rate": 0.0005033731188375714, + "loss": 2.1144, + "step": 2910 + }, + { + "epoch": 1.52, + "learning_rate": 0.0005051029233696593, + "loss": 1.7605, + "step": 2920 + }, + { + "epoch": 1.52, + "learning_rate": 0.000506832727901747, + "loss": 1.6352, + "step": 2930 + }, + { + "epoch": 1.53, + "learning_rate": 0.000508562532433835, + "loss": 1.6826, + "step": 2940 + }, + { + "epoch": 1.53, + "learning_rate": 0.0005102923369659228, + "loss": 1.8023, + "step": 2950 + }, + { + "epoch": 1.54, + "learning_rate": 0.0005120221414980108, + "loss": 1.6957, + "step": 2960 + }, + { + "epoch": 1.54, + "learning_rate": 0.0005137519460300986, + "loss": 2.1667, + "step": 2970 + }, + { + "epoch": 1.55, + "learning_rate": 0.0005154817505621864, + "loss": 2.1662, + "step": 2980 + }, + { + "epoch": 1.55, + "learning_rate": 0.0005172115550942743, + "loss": 1.8724, + "step": 2990 + }, + { + "epoch": 1.56, + "learning_rate": 0.0005189413596263622, + "loss": 1.7312, + "step": 3000 + }, + { + "epoch": 1.56, + "learning_rate": 0.0005206711641584501, + "loss": 2.0953, + "step": 3010 + }, + { + "epoch": 1.57, + "learning_rate": 0.000522400968690538, + "loss": 1.8218, + "step": 3020 + }, + { + "epoch": 1.57, + "learning_rate": 0.0005241307732226258, + "loss": 1.9732, + "step": 3030 + }, + { + "epoch": 1.58, + "learning_rate": 0.0005258605777547137, + "loss": 2.229, + "step": 3040 + }, + { + "epoch": 1.58, + "learning_rate": 0.0005275903822868016, + "loss": 1.9946, + "step": 3050 + }, + { + "epoch": 1.59, + "learning_rate": 0.0005293201868188895, + "loss": 2.162, + "step": 3060 + }, + { + "epoch": 1.59, + "learning_rate": 0.0005310499913509773, + "loss": 2.0647, + "step": 3070 + }, + { + "epoch": 1.6, + "learning_rate": 0.0005327797958830652, + "loss": 2.0291, + "step": 3080 + }, + { + "epoch": 1.6, + "learning_rate": 0.0005345096004151531, + "loss": 2.0622, + "step": 3090 + }, + { + "epoch": 1.61, + "learning_rate": 0.000536239404947241, + "loss": 1.9414, + "step": 3100 + }, + { + "epoch": 1.61, + "learning_rate": 0.0005379692094793289, + "loss": 2.0906, + "step": 3110 + }, + { + "epoch": 1.62, + "learning_rate": 0.0005396990140114167, + "loss": 2.0018, + "step": 3120 + }, + { + "epoch": 1.62, + "learning_rate": 0.0005414288185435046, + "loss": 1.9754, + "step": 3130 + }, + { + "epoch": 1.63, + "learning_rate": 0.0005431586230755925, + "loss": 1.949, + "step": 3140 + }, + { + "epoch": 1.63, + "learning_rate": 0.0005448884276076803, + "loss": 1.918, + "step": 3150 + }, + { + "epoch": 1.64, + "learning_rate": 0.0005466182321397683, + "loss": 2.1205, + "step": 3160 + }, + { + "epoch": 1.65, + "learning_rate": 0.000548348036671856, + "loss": 1.8933, + "step": 3170 + }, + { + "epoch": 1.65, + "learning_rate": 0.000550077841203944, + "loss": 1.9666, + "step": 3180 + }, + { + "epoch": 1.66, + "learning_rate": 0.0005518076457360318, + "loss": 1.9286, + "step": 3190 + }, + { + "epoch": 1.66, + "learning_rate": 0.0005535374502681198, + "loss": 1.9417, + "step": 3200 + }, + { + "epoch": 1.67, + "learning_rate": 0.0005552672548002075, + "loss": 1.8296, + "step": 3210 + }, + { + "epoch": 1.67, + "learning_rate": 0.0005569970593322955, + "loss": 1.9005, + "step": 3220 + }, + { + "epoch": 1.68, + "learning_rate": 0.0005587268638643833, + "loss": 1.954, + "step": 3230 + }, + { + "epoch": 1.68, + "learning_rate": 0.0005604566683964713, + "loss": 1.9252, + "step": 3240 + }, + { + "epoch": 1.69, + "learning_rate": 0.000562186472928559, + "loss": 1.9827, + "step": 3250 + }, + { + "epoch": 1.69, + "learning_rate": 0.000563916277460647, + "loss": 1.7931, + "step": 3260 + }, + { + "epoch": 1.7, + "learning_rate": 0.0005656460819927348, + "loss": 1.8842, + "step": 3270 + }, + { + "epoch": 1.7, + "learning_rate": 0.0005673758865248228, + "loss": 1.8838, + "step": 3280 + }, + { + "epoch": 1.71, + "learning_rate": 0.0005691056910569106, + "loss": 1.7902, + "step": 3290 + }, + { + "epoch": 1.71, + "learning_rate": 0.0005708354955889984, + "loss": 1.9054, + "step": 3300 + }, + { + "epoch": 1.72, + "learning_rate": 0.0005725653001210863, + "loss": 1.972, + "step": 3310 + }, + { + "epoch": 1.72, + "learning_rate": 0.0005742951046531742, + "loss": 1.999, + "step": 3320 + }, + { + "epoch": 1.73, + "learning_rate": 0.0005760249091852621, + "loss": 1.9566, + "step": 3330 + }, + { + "epoch": 1.73, + "learning_rate": 0.00057775471371735, + "loss": 1.8273, + "step": 3340 + }, + { + "epoch": 1.74, + "learning_rate": 0.0005794845182494378, + "loss": 1.9554, + "step": 3350 + }, + { + "epoch": 1.74, + "learning_rate": 0.0005812143227815257, + "loss": 2.0318, + "step": 3360 + }, + { + "epoch": 1.75, + "learning_rate": 0.0005829441273136136, + "loss": 1.9838, + "step": 3370 + }, + { + "epoch": 1.75, + "learning_rate": 0.0005846739318457015, + "loss": 1.9402, + "step": 3380 + }, + { + "epoch": 1.76, + "learning_rate": 0.0005864037363777894, + "loss": 1.9904, + "step": 3390 + }, + { + "epoch": 1.76, + "learning_rate": 0.0005881335409098772, + "loss": 1.9198, + "step": 3400 + }, + { + "epoch": 1.77, + "learning_rate": 0.0005898633454419651, + "loss": 1.9405, + "step": 3410 + }, + { + "epoch": 1.77, + "learning_rate": 0.0005915931499740529, + "loss": 1.8445, + "step": 3420 + }, + { + "epoch": 1.78, + "learning_rate": 0.0005933229545061409, + "loss": 1.9201, + "step": 3430 + }, + { + "epoch": 1.79, + "learning_rate": 0.0005950527590382286, + "loss": 1.8237, + "step": 3440 + }, + { + "epoch": 1.79, + "learning_rate": 0.0005967825635703165, + "loss": 1.7255, + "step": 3450 + }, + { + "epoch": 1.8, + "learning_rate": 0.0005985123681024044, + "loss": 1.844, + "step": 3460 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006002421726344923, + "loss": 2.0433, + "step": 3470 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006019719771665802, + "loss": 1.7753, + "step": 3480 + }, + { + "epoch": 1.81, + "learning_rate": 0.000603701781698668, + "loss": 1.858, + "step": 3490 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006054315862307559, + "loss": 1.9348, + "step": 3500 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006071613907628438, + "loss": 1.8196, + "step": 3510 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006088911952949317, + "loss": 2.0001, + "step": 3520 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006106209998270195, + "loss": 1.806, + "step": 3530 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006123508043591074, + "loss": 1.8369, + "step": 3540 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006140806088911953, + "loss": 1.8824, + "step": 3550 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006158104134232832, + "loss": 1.8376, + "step": 3560 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006175402179553711, + "loss": 1.7554, + "step": 3570 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006192700224874589, + "loss": 1.835, + "step": 3580 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006209998270195468, + "loss": 1.7473, + "step": 3590 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006227296315516347, + "loss": 1.8827, + "step": 3600 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006244594360837226, + "loss": 1.7536, + "step": 3610 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006261892406158103, + "loss": 1.8997, + "step": 3620 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006279190451478983, + "loss": 1.813, + "step": 3630 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006296488496799861, + "loss": 1.8474, + "step": 3640 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006313786542120741, + "loss": 1.8263, + "step": 3650 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006331084587441619, + "loss": 1.8817, + "step": 3660 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006348382632762499, + "loss": 1.9347, + "step": 3670 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006365680678083376, + "loss": 1.786, + "step": 3680 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006382978723404256, + "loss": 2.197, + "step": 3690 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006400276768725134, + "loss": 1.9695, + "step": 3700 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006417574814046014, + "loss": 1.8939, + "step": 3710 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006434872859366891, + "loss": 1.8662, + "step": 3720 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006452170904687771, + "loss": 1.8054, + "step": 3730 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006469468950008649, + "loss": 1.8351, + "step": 3740 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006486766995329529, + "loss": 1.9102, + "step": 3750 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006504065040650406, + "loss": 1.8462, + "step": 3760 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006521363085971285, + "loss": 1.894, + "step": 3770 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006538661131292164, + "loss": 1.7559, + "step": 3780 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006555959176613043, + "loss": 1.8255, + "step": 3790 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006573257221933922, + "loss": 1.7118, + "step": 3800 + }, + { + "epoch": 1.98, + "learning_rate": 0.00065905552672548, + "loss": 1.8048, + "step": 3810 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006607853312575679, + "loss": 1.7759, + "step": 3820 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006625151357896558, + "loss": 1.7974, + "step": 3830 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006642449403217437, + "loss": 1.7673, + "step": 3840 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006659747448538316, + "loss": 1.7959, + "step": 3850 + }, + { + "epoch": 2.0, + "eval_accuracy": { + "accuracy": 0.26848249027237353 + }, + "eval_f1": { + "f1": 0.17635435227079216 + }, + "eval_loss": 1.8579649925231934, + "eval_precision": { + "precision": 0.15600300783137744 + }, + "eval_recall": { + "recall": 0.2748312652287413 + }, + "eval_runtime": 115.151, + "eval_samples_per_second": 33.478, + "eval_steps_per_second": 16.743, + "step": 3854 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006677045493859194, + "loss": 1.9251, + "step": 3860 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006694343539180073, + "loss": 1.7743, + "step": 3870 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006711641584500952, + "loss": 1.8658, + "step": 3880 + }, + { + "epoch": 2.02, + "learning_rate": 0.000672893962982183, + "loss": 1.8682, + "step": 3890 + }, + { + "epoch": 2.02, + "learning_rate": 0.000674623767514271, + "loss": 1.9279, + "step": 3900 + }, + { + "epoch": 2.03, + "learning_rate": 0.0006763535720463587, + "loss": 2.0145, + "step": 3910 + }, + { + "epoch": 2.03, + "learning_rate": 0.0006780833765784466, + "loss": 2.0323, + "step": 3920 + }, + { + "epoch": 2.04, + "learning_rate": 0.0006798131811105345, + "loss": 1.9978, + "step": 3930 + }, + { + "epoch": 2.04, + "learning_rate": 0.0006815429856426224, + "loss": 1.9258, + "step": 3940 + }, + { + "epoch": 2.05, + "learning_rate": 0.0006832727901747102, + "loss": 1.9777, + "step": 3950 + }, + { + "epoch": 2.06, + "learning_rate": 0.0006850025947067981, + "loss": 1.9442, + "step": 3960 + }, + { + "epoch": 2.06, + "learning_rate": 0.000686732399238886, + "loss": 1.9772, + "step": 3970 + }, + { + "epoch": 2.07, + "learning_rate": 0.0006884622037709739, + "loss": 1.9368, + "step": 3980 + }, + { + "epoch": 2.07, + "learning_rate": 0.0006901920083030618, + "loss": 1.8948, + "step": 3990 + }, + { + "epoch": 2.08, + "learning_rate": 0.0006919218128351496, + "loss": 1.9485, + "step": 4000 + }, + { + "epoch": 2.08, + "learning_rate": 0.0006936516173672375, + "loss": 1.8472, + "step": 4010 + }, + { + "epoch": 2.09, + "learning_rate": 0.0006953814218993254, + "loss": 1.7604, + "step": 4020 + }, + { + "epoch": 2.09, + "learning_rate": 0.0006971112264314133, + "loss": 1.8371, + "step": 4030 + }, + { + "epoch": 2.1, + "learning_rate": 0.0006988410309635011, + "loss": 1.7566, + "step": 4040 + }, + { + "epoch": 2.1, + "learning_rate": 0.000700570835495589, + "loss": 1.9021, + "step": 4050 + }, + { + "epoch": 2.11, + "learning_rate": 0.0007023006400276769, + "loss": 1.744, + "step": 4060 + }, + { + "epoch": 2.11, + "learning_rate": 0.0007040304445597648, + "loss": 1.8639, + "step": 4070 + }, + { + "epoch": 2.12, + "learning_rate": 0.0007057602490918527, + "loss": 1.857, + "step": 4080 + }, + { + "epoch": 2.12, + "learning_rate": 0.0007074900536239404, + "loss": 1.8229, + "step": 4090 + }, + { + "epoch": 2.13, + "learning_rate": 0.0007092198581560284, + "loss": 1.8733, + "step": 4100 + }, + { + "epoch": 2.13, + "learning_rate": 0.0007109496626881162, + "loss": 1.8442, + "step": 4110 + }, + { + "epoch": 2.14, + "learning_rate": 0.0007126794672202042, + "loss": 1.8209, + "step": 4120 + }, + { + "epoch": 2.14, + "learning_rate": 0.0007144092717522919, + "loss": 1.7018, + "step": 4130 + }, + { + "epoch": 2.15, + "learning_rate": 0.0007161390762843799, + "loss": 1.8013, + "step": 4140 + }, + { + "epoch": 2.15, + "learning_rate": 0.0007178688808164677, + "loss": 1.9447, + "step": 4150 + }, + { + "epoch": 2.16, + "learning_rate": 0.0007195986853485557, + "loss": 1.8768, + "step": 4160 + }, + { + "epoch": 2.16, + "learning_rate": 0.0007213284898806435, + "loss": 1.822, + "step": 4170 + }, + { + "epoch": 2.17, + "learning_rate": 0.0007230582944127314, + "loss": 1.77, + "step": 4180 + }, + { + "epoch": 2.17, + "learning_rate": 0.0007247880989448192, + "loss": 1.8653, + "step": 4190 + }, + { + "epoch": 2.18, + "learning_rate": 0.0007265179034769072, + "loss": 1.7695, + "step": 4200 + }, + { + "epoch": 2.18, + "learning_rate": 0.000728247708008995, + "loss": 1.8426, + "step": 4210 + }, + { + "epoch": 2.19, + "learning_rate": 0.000729977512541083, + "loss": 1.8379, + "step": 4220 + }, + { + "epoch": 2.2, + "learning_rate": 0.0007317073170731707, + "loss": 1.8384, + "step": 4230 + }, + { + "epoch": 2.2, + "learning_rate": 0.0007334371216052586, + "loss": 1.9245, + "step": 4240 + }, + { + "epoch": 2.21, + "learning_rate": 0.0007351669261373465, + "loss": 1.8017, + "step": 4250 + }, + { + "epoch": 2.21, + "learning_rate": 0.0007368967306694344, + "loss": 1.8151, + "step": 4260 + }, + { + "epoch": 2.22, + "learning_rate": 0.0007386265352015222, + "loss": 1.8356, + "step": 4270 + }, + { + "epoch": 2.22, + "learning_rate": 0.0007403563397336101, + "loss": 1.9064, + "step": 4280 + }, + { + "epoch": 2.23, + "learning_rate": 0.000742086144265698, + "loss": 1.8325, + "step": 4290 + }, + { + "epoch": 2.23, + "learning_rate": 0.0007438159487977859, + "loss": 1.8079, + "step": 4300 + }, + { + "epoch": 2.24, + "learning_rate": 0.0007455457533298738, + "loss": 1.7581, + "step": 4310 + }, + { + "epoch": 2.24, + "learning_rate": 0.0007472755578619616, + "loss": 1.9346, + "step": 4320 + }, + { + "epoch": 2.25, + "learning_rate": 0.0007490053623940495, + "loss": 1.8706, + "step": 4330 + }, + { + "epoch": 2.25, + "learning_rate": 0.0007507351669261374, + "loss": 1.8314, + "step": 4340 + }, + { + "epoch": 2.26, + "learning_rate": 0.0007524649714582253, + "loss": 1.8881, + "step": 4350 + }, + { + "epoch": 2.26, + "learning_rate": 0.0007541947759903132, + "loss": 1.8785, + "step": 4360 + }, + { + "epoch": 2.27, + "learning_rate": 0.000755924580522401, + "loss": 1.7916, + "step": 4370 + }, + { + "epoch": 2.27, + "learning_rate": 0.0007576543850544888, + "loss": 1.8089, + "step": 4380 + }, + { + "epoch": 2.28, + "learning_rate": 0.0007593841895865767, + "loss": 1.7736, + "step": 4390 + }, + { + "epoch": 2.28, + "learning_rate": 0.0007611139941186646, + "loss": 1.8184, + "step": 4400 + }, + { + "epoch": 2.29, + "learning_rate": 0.0007628437986507524, + "loss": 1.8084, + "step": 4410 + }, + { + "epoch": 2.29, + "learning_rate": 0.0007645736031828403, + "loss": 2.0347, + "step": 4420 + }, + { + "epoch": 2.3, + "learning_rate": 0.0007663034077149282, + "loss": 1.7984, + "step": 4430 + }, + { + "epoch": 2.3, + "learning_rate": 0.0007680332122470161, + "loss": 1.7799, + "step": 4440 + }, + { + "epoch": 2.31, + "learning_rate": 0.000769763016779104, + "loss": 1.7603, + "step": 4450 + }, + { + "epoch": 2.31, + "learning_rate": 0.0007714928213111918, + "loss": 1.6663, + "step": 4460 + }, + { + "epoch": 2.32, + "learning_rate": 0.0007732226258432797, + "loss": 1.9232, + "step": 4470 + }, + { + "epoch": 2.32, + "learning_rate": 0.0007749524303753676, + "loss": 1.8086, + "step": 4480 + }, + { + "epoch": 2.33, + "learning_rate": 0.0007766822349074555, + "loss": 1.7616, + "step": 4490 + }, + { + "epoch": 2.34, + "learning_rate": 0.0007784120394395433, + "loss": 1.8018, + "step": 4500 + }, + { + "epoch": 2.34, + "learning_rate": 0.0007801418439716312, + "loss": 1.8413, + "step": 4510 + }, + { + "epoch": 2.35, + "learning_rate": 0.0007818716485037191, + "loss": 2.1126, + "step": 4520 + }, + { + "epoch": 2.35, + "learning_rate": 0.000783601453035807, + "loss": 1.9105, + "step": 4530 + }, + { + "epoch": 2.36, + "learning_rate": 0.0007853312575678949, + "loss": 1.8485, + "step": 4540 + }, + { + "epoch": 2.36, + "learning_rate": 0.0007870610620999827, + "loss": 1.8835, + "step": 4550 + }, + { + "epoch": 2.37, + "learning_rate": 0.0007887908666320705, + "loss": 1.7881, + "step": 4560 + }, + { + "epoch": 2.37, + "learning_rate": 0.0007905206711641585, + "loss": 1.7013, + "step": 4570 + }, + { + "epoch": 2.38, + "learning_rate": 0.0007922504756962463, + "loss": 1.7657, + "step": 4580 + }, + { + "epoch": 2.38, + "learning_rate": 0.0007939802802283343, + "loss": 1.9326, + "step": 4590 + }, + { + "epoch": 2.39, + "learning_rate": 0.000795710084760422, + "loss": 1.6888, + "step": 4600 + }, + { + "epoch": 2.39, + "learning_rate": 0.00079743988929251, + "loss": 1.8114, + "step": 4610 + }, + { + "epoch": 2.4, + "learning_rate": 0.0007991696938245978, + "loss": 1.9245, + "step": 4620 + }, + { + "epoch": 2.4, + "learning_rate": 0.0008008994983566858, + "loss": 1.8287, + "step": 4630 + }, + { + "epoch": 2.41, + "learning_rate": 0.0008026293028887735, + "loss": 1.8763, + "step": 4640 + }, + { + "epoch": 2.41, + "learning_rate": 0.0008043591074208615, + "loss": 1.6896, + "step": 4650 + }, + { + "epoch": 2.42, + "learning_rate": 0.0008060889119529493, + "loss": 1.9996, + "step": 4660 + }, + { + "epoch": 2.42, + "learning_rate": 0.0008078187164850373, + "loss": 1.7345, + "step": 4670 + }, + { + "epoch": 2.43, + "learning_rate": 0.0008095485210171251, + "loss": 1.7908, + "step": 4680 + }, + { + "epoch": 2.43, + "learning_rate": 0.000811278325549213, + "loss": 1.8212, + "step": 4690 + }, + { + "epoch": 2.44, + "learning_rate": 0.0008130081300813008, + "loss": 1.8434, + "step": 4700 + }, + { + "epoch": 2.44, + "learning_rate": 0.0008147379346133887, + "loss": 1.9086, + "step": 4710 + }, + { + "epoch": 2.45, + "learning_rate": 0.0008164677391454766, + "loss": 1.9098, + "step": 4720 + }, + { + "epoch": 2.45, + "learning_rate": 0.0008181975436775644, + "loss": 1.7695, + "step": 4730 + }, + { + "epoch": 2.46, + "learning_rate": 0.0008199273482096523, + "loss": 1.7415, + "step": 4740 + }, + { + "epoch": 2.46, + "learning_rate": 0.0008216571527417402, + "loss": 1.8291, + "step": 4750 + }, + { + "epoch": 2.47, + "learning_rate": 0.0008233869572738281, + "loss": 1.7842, + "step": 4760 + }, + { + "epoch": 2.48, + "learning_rate": 0.000825116761805916, + "loss": 1.8745, + "step": 4770 + }, + { + "epoch": 2.48, + "learning_rate": 0.0008268465663380038, + "loss": 1.7075, + "step": 4780 + }, + { + "epoch": 2.49, + "learning_rate": 0.0008285763708700917, + "loss": 1.8133, + "step": 4790 + }, + { + "epoch": 2.49, + "learning_rate": 0.0008303061754021796, + "loss": 1.7318, + "step": 4800 + }, + { + "epoch": 2.5, + "learning_rate": 0.0008320359799342675, + "loss": 1.874, + "step": 4810 + }, + { + "epoch": 2.5, + "learning_rate": 0.0008337657844663554, + "loss": 1.8895, + "step": 4820 + }, + { + "epoch": 2.51, + "learning_rate": 0.0008354955889984432, + "loss": 1.8485, + "step": 4830 + }, + { + "epoch": 2.51, + "learning_rate": 0.0008372253935305311, + "loss": 1.7643, + "step": 4840 + }, + { + "epoch": 2.52, + "learning_rate": 0.000838955198062619, + "loss": 1.8477, + "step": 4850 + }, + { + "epoch": 2.52, + "learning_rate": 0.0008406850025947068, + "loss": 1.8611, + "step": 4860 + }, + { + "epoch": 2.53, + "learning_rate": 0.0008424148071267946, + "loss": 1.8908, + "step": 4870 + }, + { + "epoch": 2.53, + "learning_rate": 0.0008441446116588825, + "loss": 1.8017, + "step": 4880 + }, + { + "epoch": 2.54, + "learning_rate": 0.0008458744161909704, + "loss": 1.9186, + "step": 4890 + }, + { + "epoch": 2.54, + "learning_rate": 0.0008476042207230583, + "loss": 1.8141, + "step": 4900 + }, + { + "epoch": 2.55, + "learning_rate": 0.0008493340252551462, + "loss": 1.8298, + "step": 4910 + }, + { + "epoch": 2.55, + "learning_rate": 0.000851063829787234, + "loss": 1.7711, + "step": 4920 + }, + { + "epoch": 2.56, + "learning_rate": 0.0008527936343193219, + "loss": 1.8265, + "step": 4930 + }, + { + "epoch": 2.56, + "learning_rate": 0.0008545234388514098, + "loss": 1.7649, + "step": 4940 + }, + { + "epoch": 2.57, + "learning_rate": 0.0008562532433834977, + "loss": 1.9159, + "step": 4950 + }, + { + "epoch": 2.57, + "learning_rate": 0.0008579830479155855, + "loss": 1.7968, + "step": 4960 + }, + { + "epoch": 2.58, + "learning_rate": 0.0008597128524476734, + "loss": 1.807, + "step": 4970 + }, + { + "epoch": 2.58, + "learning_rate": 0.0008614426569797613, + "loss": 1.7633, + "step": 4980 + }, + { + "epoch": 2.59, + "learning_rate": 0.0008631724615118492, + "loss": 1.8019, + "step": 4990 + }, + { + "epoch": 2.59, + "learning_rate": 0.0008649022660439371, + "loss": 1.8697, + "step": 5000 + }, + { + "epoch": 2.6, + "learning_rate": 0.0008666320705760249, + "loss": 1.8348, + "step": 5010 + }, + { + "epoch": 2.61, + "learning_rate": 0.0008683618751081128, + "loss": 1.805, + "step": 5020 + }, + { + "epoch": 2.61, + "learning_rate": 0.0008700916796402006, + "loss": 1.7346, + "step": 5030 + }, + { + "epoch": 2.62, + "learning_rate": 0.0008718214841722886, + "loss": 1.9219, + "step": 5040 + }, + { + "epoch": 2.62, + "learning_rate": 0.0008735512887043763, + "loss": 2.0954, + "step": 5050 + }, + { + "epoch": 2.63, + "learning_rate": 0.0008752810932364643, + "loss": 1.9262, + "step": 5060 + }, + { + "epoch": 2.63, + "learning_rate": 0.0008770108977685521, + "loss": 1.8491, + "step": 5070 + }, + { + "epoch": 2.64, + "learning_rate": 0.0008787407023006401, + "loss": 2.0113, + "step": 5080 + }, + { + "epoch": 2.64, + "learning_rate": 0.0008804705068327279, + "loss": 1.9341, + "step": 5090 + }, + { + "epoch": 2.65, + "learning_rate": 0.0008822003113648159, + "loss": 1.8968, + "step": 5100 + }, + { + "epoch": 2.65, + "learning_rate": 0.0008839301158969036, + "loss": 2.0009, + "step": 5110 + }, + { + "epoch": 2.66, + "learning_rate": 0.0008856599204289916, + "loss": 1.9956, + "step": 5120 + }, + { + "epoch": 2.66, + "learning_rate": 0.0008873897249610794, + "loss": 1.9083, + "step": 5130 + }, + { + "epoch": 2.67, + "learning_rate": 0.0008891195294931674, + "loss": 1.9274, + "step": 5140 + }, + { + "epoch": 2.67, + "learning_rate": 0.0008908493340252551, + "loss": 1.8263, + "step": 5150 + }, + { + "epoch": 2.68, + "learning_rate": 0.0008925791385573431, + "loss": 1.8235, + "step": 5160 + }, + { + "epoch": 2.68, + "learning_rate": 0.0008943089430894309, + "loss": 1.8786, + "step": 5170 + }, + { + "epoch": 2.69, + "learning_rate": 0.0008960387476215188, + "loss": 1.8952, + "step": 5180 + }, + { + "epoch": 2.69, + "learning_rate": 0.0008977685521536067, + "loss": 1.7448, + "step": 5190 + }, + { + "epoch": 2.7, + "learning_rate": 0.0008994983566856945, + "loss": 1.9518, + "step": 5200 + }, + { + "epoch": 2.7, + "learning_rate": 0.0009012281612177824, + "loss": 1.8636, + "step": 5210 + }, + { + "epoch": 2.71, + "learning_rate": 0.0009029579657498703, + "loss": 1.9309, + "step": 5220 + }, + { + "epoch": 2.71, + "learning_rate": 0.0009046877702819582, + "loss": 1.8714, + "step": 5230 + }, + { + "epoch": 2.72, + "learning_rate": 0.000906417574814046, + "loss": 1.7624, + "step": 5240 + }, + { + "epoch": 2.72, + "learning_rate": 0.0009081473793461339, + "loss": 1.9421, + "step": 5250 + }, + { + "epoch": 2.73, + "learning_rate": 0.0009098771838782218, + "loss": 1.9204, + "step": 5260 + }, + { + "epoch": 2.73, + "learning_rate": 0.0009116069884103097, + "loss": 1.8561, + "step": 5270 + }, + { + "epoch": 2.74, + "learning_rate": 0.0009133367929423976, + "loss": 1.8766, + "step": 5280 + }, + { + "epoch": 2.75, + "learning_rate": 0.0009150665974744854, + "loss": 1.8905, + "step": 5290 + }, + { + "epoch": 2.75, + "learning_rate": 0.0009167964020065733, + "loss": 1.8466, + "step": 5300 + }, + { + "epoch": 2.76, + "learning_rate": 0.0009185262065386612, + "loss": 1.8735, + "step": 5310 + }, + { + "epoch": 2.76, + "learning_rate": 0.0009202560110707491, + "loss": 2.0758, + "step": 5320 + }, + { + "epoch": 2.77, + "learning_rate": 0.0009219858156028368, + "loss": 1.9582, + "step": 5330 + }, + { + "epoch": 2.77, + "learning_rate": 0.0009237156201349248, + "loss": 1.9317, + "step": 5340 + }, + { + "epoch": 2.78, + "learning_rate": 0.0009254454246670126, + "loss": 1.8719, + "step": 5350 + }, + { + "epoch": 2.78, + "learning_rate": 0.0009271752291991005, + "loss": 1.8377, + "step": 5360 + }, + { + "epoch": 2.79, + "learning_rate": 0.0009289050337311884, + "loss": 1.9785, + "step": 5370 + }, + { + "epoch": 2.79, + "learning_rate": 0.0009306348382632762, + "loss": 1.9259, + "step": 5380 + }, + { + "epoch": 2.8, + "learning_rate": 0.0009323646427953641, + "loss": 1.9097, + "step": 5390 + }, + { + "epoch": 2.8, + "learning_rate": 0.000934094447327452, + "loss": 1.9034, + "step": 5400 + }, + { + "epoch": 2.81, + "learning_rate": 0.0009358242518595399, + "loss": 1.8983, + "step": 5410 + }, + { + "epoch": 2.81, + "learning_rate": 0.0009375540563916278, + "loss": 1.8115, + "step": 5420 + }, + { + "epoch": 2.82, + "learning_rate": 0.0009392838609237156, + "loss": 1.8606, + "step": 5430 + }, + { + "epoch": 2.82, + "learning_rate": 0.0009410136654558035, + "loss": 1.8883, + "step": 5440 + }, + { + "epoch": 2.83, + "learning_rate": 0.0009427434699878914, + "loss": 1.7868, + "step": 5450 + }, + { + "epoch": 2.83, + "learning_rate": 0.0009444732745199793, + "loss": 1.9107, + "step": 5460 + }, + { + "epoch": 2.84, + "learning_rate": 0.0009462030790520671, + "loss": 1.9427, + "step": 5470 + }, + { + "epoch": 2.84, + "learning_rate": 0.000947932883584155, + "loss": 1.8159, + "step": 5480 + }, + { + "epoch": 2.85, + "learning_rate": 0.0009496626881162429, + "loss": 1.909, + "step": 5490 + }, + { + "epoch": 2.85, + "learning_rate": 0.0009513924926483307, + "loss": 1.8716, + "step": 5500 + }, + { + "epoch": 2.86, + "learning_rate": 0.0009531222971804187, + "loss": 1.9116, + "step": 5510 + }, + { + "epoch": 2.86, + "learning_rate": 0.0009548521017125064, + "loss": 1.8846, + "step": 5520 + }, + { + "epoch": 2.87, + "learning_rate": 0.0009565819062445944, + "loss": 1.8776, + "step": 5530 + }, + { + "epoch": 2.87, + "learning_rate": 0.0009583117107766822, + "loss": 1.828, + "step": 5540 + }, + { + "epoch": 2.88, + "learning_rate": 0.0009600415153087702, + "loss": 1.9392, + "step": 5550 + }, + { + "epoch": 2.89, + "learning_rate": 0.0009617713198408579, + "loss": 1.8823, + "step": 5560 + }, + { + "epoch": 2.89, + "learning_rate": 0.0009635011243729459, + "loss": 1.8782, + "step": 5570 + }, + { + "epoch": 2.9, + "learning_rate": 0.0009652309289050337, + "loss": 1.8898, + "step": 5580 + }, + { + "epoch": 2.9, + "learning_rate": 0.0009669607334371217, + "loss": 1.8107, + "step": 5590 + }, + { + "epoch": 2.91, + "learning_rate": 0.0009686905379692095, + "loss": 1.8026, + "step": 5600 + }, + { + "epoch": 2.91, + "learning_rate": 0.0009704203425012974, + "loss": 1.8616, + "step": 5610 + }, + { + "epoch": 2.92, + "learning_rate": 0.0009721501470333852, + "loss": 1.8443, + "step": 5620 + }, + { + "epoch": 2.92, + "learning_rate": 0.0009738799515654732, + "loss": 1.8465, + "step": 5630 + }, + { + "epoch": 2.93, + "learning_rate": 0.000975609756097561, + "loss": 1.8806, + "step": 5640 + }, + { + "epoch": 2.93, + "learning_rate": 0.0009773395606296489, + "loss": 1.8441, + "step": 5650 + }, + { + "epoch": 2.94, + "learning_rate": 0.0009790693651617367, + "loss": 1.8194, + "step": 5660 + }, + { + "epoch": 2.94, + "learning_rate": 0.0009807991696938246, + "loss": 1.9228, + "step": 5670 + }, + { + "epoch": 2.95, + "learning_rate": 0.0009825289742259125, + "loss": 1.792, + "step": 5680 + }, + { + "epoch": 2.95, + "learning_rate": 0.0009842587787580004, + "loss": 1.8211, + "step": 5690 + }, + { + "epoch": 2.96, + "learning_rate": 0.0009859885832900882, + "loss": 1.81, + "step": 5700 + }, + { + "epoch": 2.96, + "learning_rate": 0.0009877183878221761, + "loss": 1.8862, + "step": 5710 + }, + { + "epoch": 2.97, + "learning_rate": 0.000989448192354264, + "loss": 1.8373, + "step": 5720 + }, + { + "epoch": 2.97, + "learning_rate": 0.0009911779968863519, + "loss": 1.8639, + "step": 5730 + }, + { + "epoch": 2.98, + "learning_rate": 0.0009929078014184398, + "loss": 1.8718, + "step": 5740 + }, + { + "epoch": 2.98, + "learning_rate": 0.0009946376059505276, + "loss": 1.8258, + "step": 5750 + }, + { + "epoch": 2.99, + "learning_rate": 0.0009963674104826155, + "loss": 1.8564, + "step": 5760 + }, + { + "epoch": 2.99, + "learning_rate": 0.0009980972150147034, + "loss": 1.9462, + "step": 5770 + }, + { + "epoch": 3.0, + "learning_rate": 0.0009998270195467913, + "loss": 1.8437, + "step": 5780 + }, + { + "epoch": 3.0, + "eval_accuracy": { + "accuracy": 0.1743190661478599 + }, + "eval_f1": { + "f1": 0.1111482743173735 + }, + "eval_loss": 1.9242523908615112, + "eval_precision": { + "precision": 0.1516816437642854 + }, + "eval_recall": { + "recall": 0.1887752678966561 + }, + "eval_runtime": 118.0197, + "eval_samples_per_second": 32.664, + "eval_steps_per_second": 16.336, + "step": 5781 + }, + { + "epoch": 3.0, + "learning_rate": 0.000999999926169841, + "loss": 1.8482, + "step": 5790 + }, + { + "epoch": 3.01, + "learning_rate": 0.0009999996709545039, + "loss": 1.8564, + "step": 5800 + }, + { + "epoch": 3.02, + "learning_rate": 0.0009999992334425987, + "loss": 1.8474, + "step": 5810 + }, + { + "epoch": 3.02, + "learning_rate": 0.0009999986136342847, + "loss": 1.8997, + "step": 5820 + }, + { + "epoch": 3.03, + "learning_rate": 0.000999997811529788, + "loss": 1.8401, + "step": 5830 + }, + { + "epoch": 3.03, + "learning_rate": 0.000999996827129401, + "loss": 1.8567, + "step": 5840 + }, + { + "epoch": 3.04, + "learning_rate": 0.0009999956604334825, + "loss": 1.9804, + "step": 5850 + }, + { + "epoch": 3.04, + "learning_rate": 0.000999994311442458, + "loss": 1.8427, + "step": 5860 + }, + { + "epoch": 3.05, + "learning_rate": 0.0009999927801568192, + "loss": 1.8681, + "step": 5870 + }, + { + "epoch": 3.05, + "learning_rate": 0.0009999910665771247, + "loss": 1.8561, + "step": 5880 + }, + { + "epoch": 3.06, + "learning_rate": 0.000999989170703999, + "loss": 1.9098, + "step": 5890 + }, + { + "epoch": 3.06, + "learning_rate": 0.0009999870925381333, + "loss": 1.935, + "step": 5900 + }, + { + "epoch": 3.07, + "learning_rate": 0.0009999848320802853, + "loss": 2.0045, + "step": 5910 + }, + { + "epoch": 3.07, + "learning_rate": 0.0009999823893312794, + "loss": 1.9466, + "step": 5920 + }, + { + "epoch": 3.08, + "learning_rate": 0.0009999797642920058, + "loss": 1.9047, + "step": 5930 + }, + { + "epoch": 3.08, + "learning_rate": 0.0009999769569634218, + "loss": 1.9901, + "step": 5940 + }, + { + "epoch": 3.09, + "learning_rate": 0.000999973967346551, + "loss": 1.9159, + "step": 5950 + }, + { + "epoch": 3.09, + "learning_rate": 0.0009999707954424836, + "loss": 1.9826, + "step": 5960 + }, + { + "epoch": 3.1, + "learning_rate": 0.0009999674412523754, + "loss": 1.9556, + "step": 5970 + }, + { + "epoch": 3.1, + "learning_rate": 0.0009999639047774498, + "loss": 1.9591, + "step": 5980 + }, + { + "epoch": 3.11, + "learning_rate": 0.000999960186018996, + "loss": 1.9551, + "step": 5990 + }, + { + "epoch": 3.11, + "learning_rate": 0.0009999562849783699, + "loss": 1.937, + "step": 6000 + }, + { + "epoch": 3.12, + "learning_rate": 0.0009999522016569938, + "loss": 1.9167, + "step": 6010 + }, + { + "epoch": 3.12, + "learning_rate": 0.0009999479360563565, + "loss": 1.9296, + "step": 6020 + }, + { + "epoch": 3.13, + "learning_rate": 0.000999943488178013, + "loss": 1.9602, + "step": 6030 + }, + { + "epoch": 3.13, + "learning_rate": 0.0009999388580235853, + "loss": 1.9229, + "step": 6040 + }, + { + "epoch": 3.14, + "learning_rate": 0.0009999340455947613, + "loss": 1.9508, + "step": 6050 + }, + { + "epoch": 3.14, + "learning_rate": 0.0009999290508932957, + "loss": 1.9451, + "step": 6060 + }, + { + "epoch": 3.15, + "learning_rate": 0.0009999238739210093, + "loss": 1.9379, + "step": 6070 + }, + { + "epoch": 3.16, + "learning_rate": 0.00099991851467979, + "loss": 1.9622, + "step": 6080 + }, + { + "epoch": 3.16, + "learning_rate": 0.0009999129731715912, + "loss": 1.9257, + "step": 6090 + }, + { + "epoch": 3.17, + "learning_rate": 0.0009999072493984337, + "loss": 1.936, + "step": 6100 + }, + { + "epoch": 3.17, + "learning_rate": 0.0009999013433624041, + "loss": 1.939, + "step": 6110 + }, + { + "epoch": 3.18, + "learning_rate": 0.000999895255065656, + "loss": 1.9445, + "step": 6120 + }, + { + "epoch": 3.18, + "learning_rate": 0.0009998889845104092, + "loss": 1.945, + "step": 6130 + }, + { + "epoch": 3.19, + "learning_rate": 0.0009998825316989493, + "loss": 1.989, + "step": 6140 + }, + { + "epoch": 3.19, + "learning_rate": 0.0009998758966336297, + "loss": 1.9469, + "step": 6150 + }, + { + "epoch": 3.2, + "learning_rate": 0.0009998690793168688, + "loss": 1.9297, + "step": 6160 + }, + { + "epoch": 3.2, + "learning_rate": 0.0009998620797511525, + "loss": 1.9461, + "step": 6170 + }, + { + "epoch": 3.21, + "learning_rate": 0.0009998548979390331, + "loss": 1.9486, + "step": 6180 + }, + { + "epoch": 3.21, + "learning_rate": 0.0009998475338831286, + "loss": 1.9473, + "step": 6190 + }, + { + "epoch": 3.22, + "learning_rate": 0.000999839987586124, + "loss": 1.9557, + "step": 6200 + }, + { + "epoch": 3.22, + "learning_rate": 0.0009998322590507706, + "loss": 1.9447, + "step": 6210 + }, + { + "epoch": 3.23, + "learning_rate": 0.0009998243482798863, + "loss": 1.9628, + "step": 6220 + }, + { + "epoch": 3.23, + "learning_rate": 0.0009998162552763551, + "loss": 1.9514, + "step": 6230 + }, + { + "epoch": 3.24, + "learning_rate": 0.0009998079800431282, + "loss": 1.9163, + "step": 6240 + }, + { + "epoch": 3.24, + "learning_rate": 0.000999799522583222, + "loss": 1.9278, + "step": 6250 + }, + { + "epoch": 3.25, + "learning_rate": 0.00099979088289972, + "loss": 1.9194, + "step": 6260 + }, + { + "epoch": 3.25, + "learning_rate": 0.0009997820609957732, + "loss": 1.9528, + "step": 6270 + }, + { + "epoch": 3.26, + "learning_rate": 0.0009997730568745966, + "loss": 1.9465, + "step": 6280 + }, + { + "epoch": 3.26, + "learning_rate": 0.0009997638705394741, + "loss": 1.9105, + "step": 6290 + }, + { + "epoch": 3.27, + "learning_rate": 0.0009997545019937546, + "loss": 1.9432, + "step": 6300 + }, + { + "epoch": 3.27, + "learning_rate": 0.0009997449512408538, + "loss": 1.9348, + "step": 6310 + }, + { + "epoch": 3.28, + "learning_rate": 0.0009997352182842538, + "loss": 1.9364, + "step": 6320 + }, + { + "epoch": 3.28, + "learning_rate": 0.0009997253031275035, + "loss": 1.9734, + "step": 6330 + }, + { + "epoch": 3.29, + "learning_rate": 0.0009997152057742173, + "loss": 1.9606, + "step": 6340 + }, + { + "epoch": 3.3, + "learning_rate": 0.0009997049262280772, + "loss": 1.9483, + "step": 6350 + }, + { + "epoch": 3.3, + "learning_rate": 0.0009996944644928307, + "loss": 1.9196, + "step": 6360 + }, + { + "epoch": 3.31, + "learning_rate": 0.0009996838205722923, + "loss": 1.9586, + "step": 6370 + }, + { + "epoch": 3.31, + "learning_rate": 0.0009996729944703427, + "loss": 1.9599, + "step": 6380 + }, + { + "epoch": 3.32, + "learning_rate": 0.0009996619861909288, + "loss": 1.9236, + "step": 6390 + }, + { + "epoch": 3.32, + "learning_rate": 0.0009996507957380643, + "loss": 1.9563, + "step": 6400 + }, + { + "epoch": 3.33, + "learning_rate": 0.000999639423115829, + "loss": 1.9324, + "step": 6410 + }, + { + "epoch": 3.33, + "learning_rate": 0.0009996278683283696, + "loss": 1.9386, + "step": 6420 + }, + { + "epoch": 3.34, + "learning_rate": 0.0009996161313798985, + "loss": 1.9506, + "step": 6430 + }, + { + "epoch": 3.34, + "learning_rate": 0.0009996042122746955, + "loss": 1.9184, + "step": 6440 + }, + { + "epoch": 3.35, + "learning_rate": 0.0009995921110171058, + "loss": 1.955, + "step": 6450 + }, + { + "epoch": 3.35, + "learning_rate": 0.0009995798276115414, + "loss": 1.9215, + "step": 6460 + }, + { + "epoch": 3.36, + "learning_rate": 0.0009995673620624807, + "loss": 1.9362, + "step": 6470 + }, + { + "epoch": 3.36, + "learning_rate": 0.0009995547143744688, + "loss": 1.939, + "step": 6480 + }, + { + "epoch": 3.37, + "learning_rate": 0.000999541884552117, + "loss": 1.8658, + "step": 6490 + }, + { + "epoch": 3.37, + "learning_rate": 0.0009995288726001028, + "loss": 1.9591, + "step": 6500 + }, + { + "epoch": 3.38, + "learning_rate": 0.0009995156785231702, + "loss": 1.9814, + "step": 6510 + }, + { + "epoch": 3.38, + "learning_rate": 0.0009995023023261298, + "loss": 1.9623, + "step": 6520 + }, + { + "epoch": 3.39, + "learning_rate": 0.0009994887440138588, + "loss": 1.9069, + "step": 6530 + }, + { + "epoch": 3.39, + "learning_rate": 0.0009994750035912997, + "loss": 1.8249, + "step": 6540 + }, + { + "epoch": 3.4, + "learning_rate": 0.0009994610810634627, + "loss": 1.8952, + "step": 6550 + }, + { + "epoch": 3.4, + "learning_rate": 0.000999446976435424, + "loss": 1.9932, + "step": 6560 + }, + { + "epoch": 3.41, + "learning_rate": 0.0009994326897123256, + "loss": 1.8771, + "step": 6570 + }, + { + "epoch": 3.41, + "learning_rate": 0.0009994182208993764, + "loss": 2.0021, + "step": 6580 + }, + { + "epoch": 3.42, + "learning_rate": 0.000999403570001852, + "loss": 1.9962, + "step": 6590 + }, + { + "epoch": 3.43, + "learning_rate": 0.0009993887370250939, + "loss": 1.9772, + "step": 6600 + }, + { + "epoch": 3.43, + "learning_rate": 0.00099937372197451, + "loss": 1.951, + "step": 6610 + }, + { + "epoch": 3.44, + "learning_rate": 0.0009993585248555745, + "loss": 1.9398, + "step": 6620 + }, + { + "epoch": 3.44, + "learning_rate": 0.0009993431456738284, + "loss": 1.9512, + "step": 6630 + }, + { + "epoch": 3.45, + "learning_rate": 0.000999327584434879, + "loss": 1.9954, + "step": 6640 + }, + { + "epoch": 3.45, + "learning_rate": 0.0009993118411443995, + "loss": 1.9426, + "step": 6650 + }, + { + "epoch": 3.46, + "learning_rate": 0.00099929591580813, + "loss": 1.9368, + "step": 6660 + }, + { + "epoch": 3.46, + "learning_rate": 0.0009992798084318766, + "loss": 1.9514, + "step": 6670 + }, + { + "epoch": 3.47, + "learning_rate": 0.0009992635190215122, + "loss": 1.9406, + "step": 6680 + }, + { + "epoch": 3.47, + "learning_rate": 0.0009992470475829755, + "loss": 1.9547, + "step": 6690 + }, + { + "epoch": 3.48, + "learning_rate": 0.0009992303941222722, + "loss": 1.9348, + "step": 6700 + }, + { + "epoch": 3.48, + "learning_rate": 0.000999213558645474, + "loss": 1.9717, + "step": 6710 + }, + { + "epoch": 3.49, + "learning_rate": 0.0009991965411587187, + "loss": 1.9505, + "step": 6720 + }, + { + "epoch": 3.49, + "learning_rate": 0.000999179341668211, + "loss": 1.925, + "step": 6730 + }, + { + "epoch": 3.5, + "learning_rate": 0.0009991619601802217, + "loss": 1.9599, + "step": 6740 + }, + { + "epoch": 3.5, + "learning_rate": 0.000999144396701088, + "loss": 1.9617, + "step": 6750 + }, + { + "epoch": 3.51, + "learning_rate": 0.0009991266512372132, + "loss": 1.9616, + "step": 6760 + }, + { + "epoch": 3.51, + "learning_rate": 0.0009991087237950676, + "loss": 1.9458, + "step": 6770 + }, + { + "epoch": 3.52, + "learning_rate": 0.0009990906143811868, + "loss": 1.9476, + "step": 6780 + }, + { + "epoch": 3.52, + "learning_rate": 0.0009990723230021742, + "loss": 1.919, + "step": 6790 + }, + { + "epoch": 3.53, + "learning_rate": 0.000999053849664698, + "loss": 1.9499, + "step": 6800 + }, + { + "epoch": 3.53, + "learning_rate": 0.000999035194375494, + "loss": 1.9444, + "step": 6810 + }, + { + "epoch": 3.54, + "learning_rate": 0.0009990163571413633, + "loss": 1.9498, + "step": 6820 + }, + { + "epoch": 3.54, + "learning_rate": 0.0009989973379691739, + "loss": 1.9729, + "step": 6830 + }, + { + "epoch": 3.55, + "learning_rate": 0.0009989781368658604, + "loss": 2.0205, + "step": 6840 + }, + { + "epoch": 3.55, + "learning_rate": 0.0009989587538384232, + "loss": 1.9512, + "step": 6850 + }, + { + "epoch": 3.56, + "learning_rate": 0.0009989391888939295, + "loss": 1.9329, + "step": 6860 + }, + { + "epoch": 3.57, + "learning_rate": 0.000998919442039512, + "loss": 1.9252, + "step": 6870 + }, + { + "epoch": 3.57, + "learning_rate": 0.0009988995132823705, + "loss": 1.9392, + "step": 6880 + }, + { + "epoch": 3.58, + "learning_rate": 0.0009988794026297711, + "loss": 1.9263, + "step": 6890 + }, + { + "epoch": 3.58, + "learning_rate": 0.000998859110089046, + "loss": 1.9332, + "step": 6900 + }, + { + "epoch": 3.59, + "learning_rate": 0.0009988386356675932, + "loss": 1.9138, + "step": 6910 + }, + { + "epoch": 3.59, + "learning_rate": 0.000998817979372878, + "loss": 1.9538, + "step": 6920 + }, + { + "epoch": 3.6, + "learning_rate": 0.0009987971412124318, + "loss": 1.9492, + "step": 6930 + }, + { + "epoch": 3.6, + "learning_rate": 0.0009987761211938514, + "loss": 1.9498, + "step": 6940 + }, + { + "epoch": 3.61, + "learning_rate": 0.000998754919324801, + "loss": 1.9517, + "step": 6950 + }, + { + "epoch": 3.61, + "learning_rate": 0.0009987335356130107, + "loss": 1.916, + "step": 6960 + }, + { + "epoch": 3.62, + "learning_rate": 0.0009987119700662767, + "loss": 1.9625, + "step": 6970 + }, + { + "epoch": 3.62, + "learning_rate": 0.0009986902226924615, + "loss": 1.9502, + "step": 6980 + }, + { + "epoch": 3.63, + "learning_rate": 0.000998668293499494, + "loss": 1.9368, + "step": 6990 + }, + { + "epoch": 3.63, + "learning_rate": 0.0009986461824953701, + "loss": 1.9518, + "step": 7000 + }, + { + "epoch": 3.64, + "learning_rate": 0.0009986238896881508, + "loss": 1.9244, + "step": 7010 + }, + { + "epoch": 3.64, + "learning_rate": 0.0009986014150859637, + "loss": 1.9348, + "step": 7020 + }, + { + "epoch": 3.65, + "learning_rate": 0.0009985787586970034, + "loss": 1.9646, + "step": 7030 + }, + { + "epoch": 3.65, + "learning_rate": 0.0009985559205295301, + "loss": 1.9216, + "step": 7040 + }, + { + "epoch": 3.66, + "learning_rate": 0.0009985329005918703, + "loss": 1.9065, + "step": 7050 + }, + { + "epoch": 3.66, + "learning_rate": 0.0009985096988924169, + "loss": 1.9527, + "step": 7060 + }, + { + "epoch": 3.67, + "learning_rate": 0.0009984863154396293, + "loss": 1.9143, + "step": 7070 + }, + { + "epoch": 3.67, + "learning_rate": 0.0009984627502420329, + "loss": 1.9082, + "step": 7080 + }, + { + "epoch": 3.68, + "learning_rate": 0.0009984390033082194, + "loss": 1.9009, + "step": 7090 + }, + { + "epoch": 3.68, + "learning_rate": 0.0009984150746468465, + "loss": 1.766, + "step": 7100 + }, + { + "epoch": 3.69, + "learning_rate": 0.0009983909642666388, + "loss": 1.8211, + "step": 7110 + }, + { + "epoch": 3.69, + "learning_rate": 0.000998366672176387, + "loss": 1.8446, + "step": 7120 + }, + { + "epoch": 3.7, + "learning_rate": 0.000998342198384947, + "loss": 1.8342, + "step": 7130 + }, + { + "epoch": 3.71, + "learning_rate": 0.0009983175429012425, + "loss": 1.8967, + "step": 7140 + }, + { + "epoch": 3.71, + "learning_rate": 0.0009982927057342623, + "loss": 1.8948, + "step": 7150 + }, + { + "epoch": 3.72, + "learning_rate": 0.0009982676868930623, + "loss": 1.9191, + "step": 7160 + }, + { + "epoch": 3.72, + "learning_rate": 0.0009982424863867639, + "loss": 1.9185, + "step": 7170 + }, + { + "epoch": 3.73, + "learning_rate": 0.0009982171042245552, + "loss": 1.8958, + "step": 7180 + }, + { + "epoch": 3.73, + "learning_rate": 0.0009981915404156901, + "loss": 1.9371, + "step": 7190 + }, + { + "epoch": 3.74, + "learning_rate": 0.0009981657949694892, + "loss": 1.8752, + "step": 7200 + }, + { + "epoch": 3.74, + "learning_rate": 0.000998139867895339, + "loss": 1.8434, + "step": 7210 + }, + { + "epoch": 3.75, + "learning_rate": 0.0009981137592026926, + "loss": 1.8518, + "step": 7220 + }, + { + "epoch": 3.75, + "learning_rate": 0.0009980874689010687, + "loss": 1.7907, + "step": 7230 + }, + { + "epoch": 3.76, + "learning_rate": 0.000998060997000053, + "loss": 1.8275, + "step": 7240 + }, + { + "epoch": 3.76, + "learning_rate": 0.0009980343435092966, + "loss": 1.9524, + "step": 7250 + }, + { + "epoch": 3.77, + "learning_rate": 0.0009980075084385174, + "loss": 1.8303, + "step": 7260 + }, + { + "epoch": 3.77, + "learning_rate": 0.000997980491797499, + "loss": 1.8564, + "step": 7270 + }, + { + "epoch": 3.78, + "learning_rate": 0.000997953293596092, + "loss": 1.8884, + "step": 7280 + }, + { + "epoch": 3.78, + "learning_rate": 0.000997925913844212, + "loss": 1.862, + "step": 7290 + }, + { + "epoch": 3.79, + "learning_rate": 0.0009978983525518422, + "loss": 1.8021, + "step": 7300 + }, + { + "epoch": 3.79, + "learning_rate": 0.0009978706097290308, + "loss": 1.9126, + "step": 7310 + }, + { + "epoch": 3.8, + "learning_rate": 0.0009978426853858929, + "loss": 1.9132, + "step": 7320 + }, + { + "epoch": 3.8, + "learning_rate": 0.0009978145795326093, + "loss": 1.8305, + "step": 7330 + }, + { + "epoch": 3.81, + "learning_rate": 0.0009977862921794272, + "loss": 1.871, + "step": 7340 + }, + { + "epoch": 3.81, + "learning_rate": 0.0009977578233366603, + "loss": 1.8928, + "step": 7350 + }, + { + "epoch": 3.82, + "learning_rate": 0.0009977291730146876, + "loss": 1.8817, + "step": 7360 + }, + { + "epoch": 3.82, + "learning_rate": 0.0009977003412239556, + "loss": 1.8307, + "step": 7370 + }, + { + "epoch": 3.83, + "learning_rate": 0.0009976713279749754, + "loss": 1.8866, + "step": 7380 + }, + { + "epoch": 3.83, + "learning_rate": 0.0009976421332783256, + "loss": 1.8442, + "step": 7390 + }, + { + "epoch": 3.84, + "learning_rate": 0.00099761275714465, + "loss": 1.8367, + "step": 7400 + }, + { + "epoch": 3.85, + "learning_rate": 0.0009975831995846591, + "loss": 1.8936, + "step": 7410 + }, + { + "epoch": 3.85, + "learning_rate": 0.0009975534606091296, + "loss": 1.807, + "step": 7420 + }, + { + "epoch": 3.86, + "learning_rate": 0.0009975235402289039, + "loss": 1.8231, + "step": 7430 + }, + { + "epoch": 3.86, + "learning_rate": 0.0009974934384548906, + "loss": 1.8122, + "step": 7440 + }, + { + "epoch": 3.87, + "learning_rate": 0.000997463155298065, + "loss": 1.8028, + "step": 7450 + }, + { + "epoch": 3.87, + "learning_rate": 0.000997432690769468, + "loss": 1.8177, + "step": 7460 + }, + { + "epoch": 3.88, + "learning_rate": 0.0009974020448802066, + "loss": 1.9069, + "step": 7470 + }, + { + "epoch": 3.88, + "learning_rate": 0.0009973712176414543, + "loss": 1.8245, + "step": 7480 + }, + { + "epoch": 3.89, + "learning_rate": 0.0009973402090644504, + "loss": 1.8876, + "step": 7490 + }, + { + "epoch": 3.89, + "learning_rate": 0.0009973090191605005, + "loss": 1.8971, + "step": 7500 + }, + { + "epoch": 3.9, + "learning_rate": 0.000997277647940976, + "loss": 1.7917, + "step": 7510 + }, + { + "epoch": 3.9, + "learning_rate": 0.000997246095417315, + "loss": 1.8608, + "step": 7520 + }, + { + "epoch": 3.91, + "learning_rate": 0.000997214361601021, + "loss": 1.7863, + "step": 7530 + }, + { + "epoch": 3.91, + "learning_rate": 0.0009971824465036642, + "loss": 1.8487, + "step": 7540 + }, + { + "epoch": 3.92, + "learning_rate": 0.0009971503501368805, + "loss": 1.8698, + "step": 7550 + }, + { + "epoch": 3.92, + "learning_rate": 0.0009971180725123722, + "loss": 1.8424, + "step": 7560 + }, + { + "epoch": 3.93, + "learning_rate": 0.000997085613641907, + "loss": 1.9235, + "step": 7570 + }, + { + "epoch": 3.93, + "learning_rate": 0.0009970529735373198, + "loss": 1.7802, + "step": 7580 + }, + { + "epoch": 3.94, + "learning_rate": 0.0009970201522105106, + "loss": 1.8938, + "step": 7590 + }, + { + "epoch": 3.94, + "learning_rate": 0.000996987149673446, + "loss": 1.7626, + "step": 7600 + }, + { + "epoch": 3.95, + "learning_rate": 0.0009969539659381583, + "loss": 1.8598, + "step": 7610 + }, + { + "epoch": 3.95, + "learning_rate": 0.0009969206010167465, + "loss": 1.8503, + "step": 7620 + }, + { + "epoch": 3.96, + "learning_rate": 0.0009968870549213749, + "loss": 1.9692, + "step": 7630 + }, + { + "epoch": 3.96, + "learning_rate": 0.0009968533276642742, + "loss": 1.8035, + "step": 7640 + }, + { + "epoch": 3.97, + "learning_rate": 0.000996819419257741, + "loss": 1.8945, + "step": 7650 + }, + { + "epoch": 3.98, + "learning_rate": 0.0009967853297141384, + "loss": 1.8432, + "step": 7660 + }, + { + "epoch": 3.98, + "learning_rate": 0.0009967510590458953, + "loss": 1.8781, + "step": 7670 + }, + { + "epoch": 3.99, + "learning_rate": 0.000996716607265506, + "loss": 1.8669, + "step": 7680 + }, + { + "epoch": 3.99, + "learning_rate": 0.0009966819743855318, + "loss": 1.8776, + "step": 7690 + }, + { + "epoch": 4.0, + "learning_rate": 0.0009966471604185997, + "loss": 1.803, + "step": 7700 + }, + { + "epoch": 4.0, + "eval_accuracy": { + "accuracy": 0.1846952010376135 + }, + "eval_f1": { + "f1": 0.1316379146314846 + }, + "eval_loss": 1.9003419876098633, + "eval_precision": { + "precision": 0.1460659091286986 + }, + "eval_recall": { + "recall": 0.1928584239028949 + }, + "eval_runtime": 112.0693, + "eval_samples_per_second": 34.398, + "eval_steps_per_second": 17.204, + "step": 7708 + }, + { + "epoch": 4.0, + "learning_rate": 0.0009966121653774023, + "loss": 1.8044, + "step": 7710 + }, + { + "epoch": 4.01, + "learning_rate": 0.000996576989274699, + "loss": 1.856, + "step": 7720 + }, + { + "epoch": 4.01, + "learning_rate": 0.0009965416321233143, + "loss": 1.7691, + "step": 7730 + }, + { + "epoch": 4.02, + "learning_rate": 0.0009965060939361393, + "loss": 1.8381, + "step": 7740 + }, + { + "epoch": 4.02, + "learning_rate": 0.0009964703747261312, + "loss": 1.9571, + "step": 7750 + }, + { + "epoch": 4.03, + "learning_rate": 0.000996434474506313, + "loss": 1.8362, + "step": 7760 + }, + { + "epoch": 4.03, + "learning_rate": 0.0009963983932897735, + "loss": 1.776, + "step": 7770 + }, + { + "epoch": 4.04, + "learning_rate": 0.0009963621310896678, + "loss": 1.8468, + "step": 7780 + }, + { + "epoch": 4.04, + "learning_rate": 0.0009963256879192165, + "loss": 1.7727, + "step": 7790 + }, + { + "epoch": 4.05, + "learning_rate": 0.0009962890637917072, + "loss": 1.8783, + "step": 7800 + }, + { + "epoch": 4.05, + "learning_rate": 0.0009962522587204923, + "loss": 1.8325, + "step": 7810 + }, + { + "epoch": 4.06, + "learning_rate": 0.0009962152727189908, + "loss": 1.8977, + "step": 7820 + }, + { + "epoch": 4.06, + "learning_rate": 0.0009961781058006876, + "loss": 1.7941, + "step": 7830 + }, + { + "epoch": 4.07, + "learning_rate": 0.0009961407579791334, + "loss": 1.7944, + "step": 7840 + }, + { + "epoch": 4.07, + "learning_rate": 0.0009961032292679453, + "loss": 1.7098, + "step": 7850 + }, + { + "epoch": 4.08, + "learning_rate": 0.0009960655196808055, + "loss": 1.9654, + "step": 7860 + }, + { + "epoch": 4.08, + "learning_rate": 0.0009960276292314632, + "loss": 1.755, + "step": 7870 + }, + { + "epoch": 4.09, + "learning_rate": 0.0009959895579337325, + "loss": 1.7779, + "step": 7880 + }, + { + "epoch": 4.09, + "learning_rate": 0.0009959513058014945, + "loss": 2.0262, + "step": 7890 + }, + { + "epoch": 4.1, + "learning_rate": 0.000995912872848695, + "loss": 1.9572, + "step": 7900 + }, + { + "epoch": 4.1, + "learning_rate": 0.000995874259089347, + "loss": 1.7426, + "step": 7910 + }, + { + "epoch": 4.11, + "learning_rate": 0.0009958354645375287, + "loss": 1.7812, + "step": 7920 + }, + { + "epoch": 4.12, + "learning_rate": 0.000995796489207384, + "loss": 1.8882, + "step": 7930 + }, + { + "epoch": 4.12, + "learning_rate": 0.0009957573331131236, + "loss": 1.8841, + "step": 7940 + }, + { + "epoch": 4.13, + "learning_rate": 0.000995717996269023, + "loss": 1.8111, + "step": 7950 + }, + { + "epoch": 4.13, + "learning_rate": 0.0009956784786894243, + "loss": 1.7847, + "step": 7960 + }, + { + "epoch": 4.14, + "learning_rate": 0.0009956387803887355, + "loss": 1.8502, + "step": 7970 + }, + { + "epoch": 4.14, + "learning_rate": 0.0009955989013814303, + "loss": 1.8171, + "step": 7980 + }, + { + "epoch": 4.15, + "learning_rate": 0.0009955588416820482, + "loss": 1.9437, + "step": 7990 + }, + { + "epoch": 4.15, + "learning_rate": 0.0009955186013051948, + "loss": 1.8288, + "step": 8000 + }, + { + "epoch": 4.16, + "learning_rate": 0.0009954781802655418, + "loss": 1.7936, + "step": 8010 + }, + { + "epoch": 4.16, + "learning_rate": 0.0009954375785778257, + "loss": 1.7904, + "step": 8020 + }, + { + "epoch": 4.17, + "learning_rate": 0.0009953967962568501, + "loss": 1.743, + "step": 8030 + }, + { + "epoch": 4.17, + "learning_rate": 0.0009953558333174839, + "loss": 1.7847, + "step": 8040 + }, + { + "epoch": 4.18, + "learning_rate": 0.0009953146897746618, + "loss": 1.8605, + "step": 8050 + }, + { + "epoch": 4.18, + "learning_rate": 0.0009952733656433848, + "loss": 1.7898, + "step": 8060 + }, + { + "epoch": 4.19, + "learning_rate": 0.000995231860938719, + "loss": 1.9566, + "step": 8070 + }, + { + "epoch": 4.19, + "learning_rate": 0.000995190175675797, + "loss": 1.8443, + "step": 8080 + }, + { + "epoch": 4.2, + "learning_rate": 0.0009951483098698168, + "loss": 1.8683, + "step": 8090 + }, + { + "epoch": 4.2, + "learning_rate": 0.0009951062635360425, + "loss": 1.786, + "step": 8100 + }, + { + "epoch": 4.21, + "learning_rate": 0.000995064036689804, + "loss": 1.8314, + "step": 8110 + }, + { + "epoch": 4.21, + "learning_rate": 0.0009950216293464966, + "loss": 1.8071, + "step": 8120 + }, + { + "epoch": 4.22, + "learning_rate": 0.000994979041521582, + "loss": 1.7505, + "step": 8130 + }, + { + "epoch": 4.22, + "learning_rate": 0.0009949362732305875, + "loss": 1.7527, + "step": 8140 + }, + { + "epoch": 4.23, + "learning_rate": 0.0009948933244891059, + "loss": 1.91, + "step": 8150 + }, + { + "epoch": 4.23, + "learning_rate": 0.0009948501953127961, + "loss": 1.7183, + "step": 8160 + }, + { + "epoch": 4.24, + "learning_rate": 0.0009948068857173829, + "loss": 1.9057, + "step": 8170 + }, + { + "epoch": 4.24, + "learning_rate": 0.0009947633957186567, + "loss": 1.7652, + "step": 8180 + }, + { + "epoch": 4.25, + "learning_rate": 0.0009947197253324731, + "loss": 1.8544, + "step": 8190 + }, + { + "epoch": 4.26, + "learning_rate": 0.0009946758745747548, + "loss": 1.9037, + "step": 8200 + }, + { + "epoch": 4.26, + "learning_rate": 0.000994631843461489, + "loss": 1.8158, + "step": 8210 + }, + { + "epoch": 4.27, + "learning_rate": 0.0009945876320087293, + "loss": 1.8054, + "step": 8220 + }, + { + "epoch": 4.27, + "learning_rate": 0.000994543240232595, + "loss": 1.9452, + "step": 8230 + }, + { + "epoch": 4.28, + "learning_rate": 0.0009944986681492708, + "loss": 1.8333, + "step": 8240 + }, + { + "epoch": 4.28, + "learning_rate": 0.0009944539157750077, + "loss": 1.7858, + "step": 8250 + }, + { + "epoch": 4.29, + "learning_rate": 0.0009944089831261218, + "loss": 1.8569, + "step": 8260 + }, + { + "epoch": 4.29, + "learning_rate": 0.0009943638702189954, + "loss": 1.9051, + "step": 8270 + }, + { + "epoch": 4.3, + "learning_rate": 0.0009943185770700767, + "loss": 1.8262, + "step": 8280 + }, + { + "epoch": 4.3, + "learning_rate": 0.0009942731036958784, + "loss": 1.7873, + "step": 8290 + }, + { + "epoch": 4.31, + "learning_rate": 0.0009942274501129808, + "loss": 1.9258, + "step": 8300 + }, + { + "epoch": 4.31, + "learning_rate": 0.0009941816163380284, + "loss": 1.7442, + "step": 8310 + }, + { + "epoch": 4.32, + "learning_rate": 0.0009941356023877317, + "loss": 1.7348, + "step": 8320 + }, + { + "epoch": 4.32, + "learning_rate": 0.0009940894082788674, + "loss": 1.8439, + "step": 8330 + }, + { + "epoch": 4.33, + "learning_rate": 0.0009940430340282776, + "loss": 1.9081, + "step": 8340 + }, + { + "epoch": 4.33, + "learning_rate": 0.0009939964796528698, + "loss": 1.8382, + "step": 8350 + }, + { + "epoch": 4.34, + "learning_rate": 0.0009939497451696175, + "loss": 1.754, + "step": 8360 + }, + { + "epoch": 4.34, + "learning_rate": 0.00099390283059556, + "loss": 1.7955, + "step": 8370 + }, + { + "epoch": 4.35, + "learning_rate": 0.0009938557359478017, + "loss": 1.6966, + "step": 8380 + }, + { + "epoch": 4.35, + "learning_rate": 0.000993808461243513, + "loss": 1.7841, + "step": 8390 + }, + { + "epoch": 4.36, + "learning_rate": 0.0009937610064999305, + "loss": 1.9377, + "step": 8400 + }, + { + "epoch": 4.36, + "learning_rate": 0.0009937133717343553, + "loss": 2.106, + "step": 8410 + }, + { + "epoch": 4.37, + "learning_rate": 0.0009936655569641548, + "loss": 1.874, + "step": 8420 + }, + { + "epoch": 4.37, + "learning_rate": 0.0009936175622067623, + "loss": 1.9231, + "step": 8430 + }, + { + "epoch": 4.38, + "learning_rate": 0.0009935693874796759, + "loss": 1.8391, + "step": 8440 + }, + { + "epoch": 4.39, + "learning_rate": 0.00099352103280046, + "loss": 1.8697, + "step": 8450 + }, + { + "epoch": 4.39, + "learning_rate": 0.0009934724981867447, + "loss": 1.8482, + "step": 8460 + }, + { + "epoch": 4.4, + "learning_rate": 0.0009934237836562248, + "loss": 1.7992, + "step": 8470 + }, + { + "epoch": 4.4, + "learning_rate": 0.000993374889226662, + "loss": 1.8635, + "step": 8480 + }, + { + "epoch": 4.41, + "learning_rate": 0.000993325814915882, + "loss": 1.9378, + "step": 8490 + }, + { + "epoch": 4.41, + "learning_rate": 0.0009932765607417778, + "loss": 1.7518, + "step": 8500 + }, + { + "epoch": 4.42, + "learning_rate": 0.0009932271267223064, + "loss": 1.8512, + "step": 8510 + }, + { + "epoch": 4.42, + "learning_rate": 0.000993177512875492, + "loss": 1.8, + "step": 8520 + }, + { + "epoch": 4.43, + "learning_rate": 0.0009931277192194228, + "loss": 1.9404, + "step": 8530 + }, + { + "epoch": 4.43, + "learning_rate": 0.0009930777457722534, + "loss": 1.8171, + "step": 8540 + }, + { + "epoch": 4.44, + "learning_rate": 0.000993027592552204, + "loss": 1.853, + "step": 8550 + }, + { + "epoch": 4.44, + "learning_rate": 0.0009929772595775597, + "loss": 1.7074, + "step": 8560 + }, + { + "epoch": 4.45, + "learning_rate": 0.000992926746866672, + "loss": 1.886, + "step": 8570 + }, + { + "epoch": 4.45, + "learning_rate": 0.0009928760544379571, + "loss": 1.786, + "step": 8580 + }, + { + "epoch": 4.46, + "learning_rate": 0.0009928251823098976, + "loss": 1.7788, + "step": 8590 + }, + { + "epoch": 4.46, + "learning_rate": 0.0009927741305010408, + "loss": 1.7721, + "step": 8600 + }, + { + "epoch": 4.47, + "learning_rate": 0.0009927228990299999, + "loss": 1.8738, + "step": 8610 + }, + { + "epoch": 4.47, + "learning_rate": 0.0009926714879154538, + "loss": 1.8784, + "step": 8620 + }, + { + "epoch": 4.48, + "learning_rate": 0.0009926198971761462, + "loss": 1.7806, + "step": 8630 + }, + { + "epoch": 4.48, + "learning_rate": 0.000992568126830887, + "loss": 1.7354, + "step": 8640 + }, + { + "epoch": 4.49, + "learning_rate": 0.0009925161768985515, + "loss": 1.8456, + "step": 8650 + }, + { + "epoch": 4.49, + "learning_rate": 0.00099246404739808, + "loss": 1.9295, + "step": 8660 + }, + { + "epoch": 4.5, + "learning_rate": 0.0009924117383484788, + "loss": 1.7736, + "step": 8670 + }, + { + "epoch": 4.5, + "learning_rate": 0.0009923592497688193, + "loss": 1.9097, + "step": 8680 + }, + { + "epoch": 4.51, + "learning_rate": 0.0009923065816782382, + "loss": 1.7617, + "step": 8690 + }, + { + "epoch": 4.51, + "learning_rate": 0.0009922537340959386, + "loss": 1.8676, + "step": 8700 + }, + { + "epoch": 4.52, + "learning_rate": 0.000992200707041188, + "loss": 1.8388, + "step": 8710 + }, + { + "epoch": 4.53, + "learning_rate": 0.0009921475005333195, + "loss": 1.8154, + "step": 8720 + }, + { + "epoch": 4.53, + "learning_rate": 0.0009920941145917322, + "loss": 1.9333, + "step": 8730 + }, + { + "epoch": 4.54, + "learning_rate": 0.00099204054923589, + "loss": 1.843, + "step": 8740 + }, + { + "epoch": 4.54, + "learning_rate": 0.0009919868044853228, + "loss": 1.9432, + "step": 8750 + }, + { + "epoch": 4.55, + "learning_rate": 0.0009919328803596255, + "loss": 1.7557, + "step": 8760 + }, + { + "epoch": 4.55, + "learning_rate": 0.0009918787768784581, + "loss": 1.805, + "step": 8770 + }, + { + "epoch": 4.56, + "learning_rate": 0.0009918244940615468, + "loss": 1.8165, + "step": 8780 + }, + { + "epoch": 4.56, + "learning_rate": 0.0009917700319286827, + "loss": 1.8896, + "step": 8790 + }, + { + "epoch": 4.57, + "learning_rate": 0.000991715390499722, + "loss": 1.8423, + "step": 8800 + }, + { + "epoch": 4.57, + "learning_rate": 0.000991660569794587, + "loss": 1.9545, + "step": 8810 + }, + { + "epoch": 4.58, + "learning_rate": 0.0009916055698332646, + "loss": 1.8081, + "step": 8820 + }, + { + "epoch": 4.58, + "learning_rate": 0.0009915503906358079, + "loss": 1.7959, + "step": 8830 + }, + { + "epoch": 4.59, + "learning_rate": 0.0009914950322223344, + "loss": 1.7619, + "step": 8840 + }, + { + "epoch": 4.59, + "learning_rate": 0.0009914394946130276, + "loss": 1.6856, + "step": 8850 + }, + { + "epoch": 4.6, + "learning_rate": 0.0009913837778281363, + "loss": 1.8222, + "step": 8860 + }, + { + "epoch": 4.6, + "learning_rate": 0.000991327881887974, + "loss": 1.7221, + "step": 8870 + }, + { + "epoch": 4.61, + "learning_rate": 0.0009912718068129207, + "loss": 1.7537, + "step": 8880 + }, + { + "epoch": 4.61, + "learning_rate": 0.0009912155526234203, + "loss": 1.7016, + "step": 8890 + }, + { + "epoch": 4.62, + "learning_rate": 0.0009911591193399831, + "loss": 1.84, + "step": 8900 + }, + { + "epoch": 4.62, + "learning_rate": 0.0009911025069831842, + "loss": 1.8604, + "step": 8910 + }, + { + "epoch": 4.63, + "learning_rate": 0.0009910457155736642, + "loss": 1.7591, + "step": 8920 + }, + { + "epoch": 4.63, + "learning_rate": 0.0009909887451321287, + "loss": 1.8894, + "step": 8930 + }, + { + "epoch": 4.64, + "learning_rate": 0.000990931595679349, + "loss": 1.8903, + "step": 8940 + }, + { + "epoch": 4.64, + "learning_rate": 0.000990874267236161, + "loss": 1.8954, + "step": 8950 + }, + { + "epoch": 4.65, + "learning_rate": 0.0009908167598234666, + "loss": 1.7949, + "step": 8960 + }, + { + "epoch": 4.65, + "learning_rate": 0.0009907590734622325, + "loss": 1.8333, + "step": 8970 + }, + { + "epoch": 4.66, + "learning_rate": 0.000990701208173491, + "loss": 1.8182, + "step": 8980 + }, + { + "epoch": 4.67, + "learning_rate": 0.000990643163978339, + "loss": 1.8232, + "step": 8990 + }, + { + "epoch": 4.67, + "learning_rate": 0.000990584940897939, + "loss": 1.6914, + "step": 9000 + }, + { + "epoch": 4.68, + "learning_rate": 0.0009905265389535192, + "loss": 1.8622, + "step": 9010 + }, + { + "epoch": 4.68, + "learning_rate": 0.0009904679581663723, + "loss": 1.8321, + "step": 9020 + }, + { + "epoch": 4.69, + "learning_rate": 0.0009904091985578564, + "loss": 1.8133, + "step": 9030 + }, + { + "epoch": 4.69, + "learning_rate": 0.0009903502601493952, + "loss": 1.825, + "step": 9040 + }, + { + "epoch": 4.7, + "learning_rate": 0.000990291142962477, + "loss": 1.8074, + "step": 9050 + }, + { + "epoch": 4.7, + "learning_rate": 0.0009902318470186554, + "loss": 1.7792, + "step": 9060 + }, + { + "epoch": 4.71, + "learning_rate": 0.0009901723723395495, + "loss": 1.8017, + "step": 9070 + }, + { + "epoch": 4.71, + "learning_rate": 0.0009901127189468433, + "loss": 1.8355, + "step": 9080 + }, + { + "epoch": 4.72, + "learning_rate": 0.0009900528868622862, + "loss": 1.7781, + "step": 9090 + }, + { + "epoch": 4.72, + "learning_rate": 0.0009899928761076923, + "loss": 1.7382, + "step": 9100 + }, + { + "epoch": 4.73, + "learning_rate": 0.0009899326867049414, + "loss": 1.7697, + "step": 9110 + }, + { + "epoch": 4.73, + "learning_rate": 0.000989872318675978, + "loss": 1.7791, + "step": 9120 + }, + { + "epoch": 4.74, + "learning_rate": 0.0009898117720428118, + "loss": 1.853, + "step": 9130 + }, + { + "epoch": 4.74, + "learning_rate": 0.0009897510468275179, + "loss": 1.8826, + "step": 9140 + }, + { + "epoch": 4.75, + "learning_rate": 0.0009896901430522362, + "loss": 1.9778, + "step": 9150 + }, + { + "epoch": 4.75, + "learning_rate": 0.0009896290607391718, + "loss": 1.9321, + "step": 9160 + }, + { + "epoch": 4.76, + "learning_rate": 0.000989567799910595, + "loss": 1.9429, + "step": 9170 + }, + { + "epoch": 4.76, + "learning_rate": 0.000989506360588841, + "loss": 1.9588, + "step": 9180 + }, + { + "epoch": 4.77, + "learning_rate": 0.0009894447427963103, + "loss": 1.9601, + "step": 9190 + }, + { + "epoch": 4.77, + "learning_rate": 0.0009893829465554681, + "loss": 1.9501, + "step": 9200 + }, + { + "epoch": 4.78, + "learning_rate": 0.0009893209718888454, + "loss": 1.9414, + "step": 9210 + }, + { + "epoch": 4.78, + "learning_rate": 0.0009892588188190371, + "loss": 2.0087, + "step": 9220 + }, + { + "epoch": 4.79, + "learning_rate": 0.0009891964873687041, + "loss": 1.9263, + "step": 9230 + }, + { + "epoch": 4.8, + "learning_rate": 0.000989133977560572, + "loss": 1.9342, + "step": 9240 + }, + { + "epoch": 4.8, + "learning_rate": 0.0009890712894174316, + "loss": 1.9505, + "step": 9250 + }, + { + "epoch": 4.81, + "learning_rate": 0.0009890084229621384, + "loss": 1.9658, + "step": 9260 + }, + { + "epoch": 4.81, + "learning_rate": 0.0009889453782176132, + "loss": 1.9293, + "step": 9270 + }, + { + "epoch": 4.82, + "learning_rate": 0.0009888821552068417, + "loss": 1.946, + "step": 9280 + }, + { + "epoch": 4.82, + "learning_rate": 0.0009888187539528746, + "loss": 2.0008, + "step": 9290 + }, + { + "epoch": 4.83, + "learning_rate": 0.0009887551744788272, + "loss": 1.9339, + "step": 9300 + }, + { + "epoch": 4.83, + "learning_rate": 0.0009886914168078808, + "loss": 1.9194, + "step": 9310 + }, + { + "epoch": 4.84, + "learning_rate": 0.0009886274809632807, + "loss": 1.9403, + "step": 9320 + }, + { + "epoch": 4.84, + "learning_rate": 0.0009885633669683375, + "loss": 1.9332, + "step": 9330 + }, + { + "epoch": 4.85, + "learning_rate": 0.0009884990748464267, + "loss": 1.9838, + "step": 9340 + }, + { + "epoch": 4.85, + "learning_rate": 0.0009884346046209887, + "loss": 1.9355, + "step": 9350 + }, + { + "epoch": 4.86, + "learning_rate": 0.0009883699563155292, + "loss": 1.9767, + "step": 9360 + }, + { + "epoch": 4.86, + "learning_rate": 0.0009883051299536183, + "loss": 1.9497, + "step": 9370 + }, + { + "epoch": 4.87, + "learning_rate": 0.0009882401255588912, + "loss": 1.9496, + "step": 9380 + }, + { + "epoch": 4.87, + "learning_rate": 0.0009881749431550485, + "loss": 1.9549, + "step": 9390 + }, + { + "epoch": 4.88, + "learning_rate": 0.000988109582765855, + "loss": 1.937, + "step": 9400 + }, + { + "epoch": 4.88, + "learning_rate": 0.0009880440444151404, + "loss": 1.9494, + "step": 9410 + }, + { + "epoch": 4.89, + "learning_rate": 0.0009879783281267999, + "loss": 1.9397, + "step": 9420 + }, + { + "epoch": 4.89, + "learning_rate": 0.000987912433924793, + "loss": 1.9257, + "step": 9430 + }, + { + "epoch": 4.9, + "learning_rate": 0.0009878463618331445, + "loss": 1.8816, + "step": 9440 + }, + { + "epoch": 4.9, + "learning_rate": 0.0009877801118759438, + "loss": 1.9802, + "step": 9450 + }, + { + "epoch": 4.91, + "learning_rate": 0.0009877136840773452, + "loss": 1.9163, + "step": 9460 + }, + { + "epoch": 4.91, + "learning_rate": 0.0009876470784615677, + "loss": 1.8752, + "step": 9470 + }, + { + "epoch": 4.92, + "learning_rate": 0.0009875802950528954, + "loss": 1.9372, + "step": 9480 + }, + { + "epoch": 4.92, + "learning_rate": 0.000987513333875677, + "loss": 1.8707, + "step": 9490 + }, + { + "epoch": 4.93, + "learning_rate": 0.0009874461949543262, + "loss": 1.7964, + "step": 9500 + }, + { + "epoch": 4.94, + "learning_rate": 0.0009873788783133213, + "loss": 1.8374, + "step": 9510 + }, + { + "epoch": 4.94, + "learning_rate": 0.0009873113839772058, + "loss": 1.7562, + "step": 9520 + }, + { + "epoch": 4.95, + "learning_rate": 0.0009872437119705871, + "loss": 1.7843, + "step": 9530 + }, + { + "epoch": 4.95, + "learning_rate": 0.0009871758623181384, + "loss": 1.7316, + "step": 9540 + }, + { + "epoch": 4.96, + "learning_rate": 0.0009871078350445971, + "loss": 1.8084, + "step": 9550 + }, + { + "epoch": 4.96, + "learning_rate": 0.0009870396301747658, + "loss": 1.8954, + "step": 9560 + }, + { + "epoch": 4.97, + "learning_rate": 0.000986971247733511, + "loss": 1.7907, + "step": 9570 + }, + { + "epoch": 4.97, + "learning_rate": 0.0009869026877457649, + "loss": 1.9981, + "step": 9580 + }, + { + "epoch": 4.98, + "learning_rate": 0.0009868339502365237, + "loss": 1.8354, + "step": 9590 + }, + { + "epoch": 4.98, + "learning_rate": 0.000986765035230849, + "loss": 1.8247, + "step": 9600 + }, + { + "epoch": 4.99, + "learning_rate": 0.0009866959427538666, + "loss": 1.7769, + "step": 9610 + }, + { + "epoch": 4.99, + "learning_rate": 0.000986626672830767, + "loss": 1.8071, + "step": 9620 + }, + { + "epoch": 5.0, + "learning_rate": 0.0009865572254868058, + "loss": 1.8121, + "step": 9630 + }, + { + "epoch": 5.0, + "eval_accuracy": { + "accuracy": 0.33229571984435796 + }, + "eval_f1": { + "f1": 0.24134878979912178 + }, + "eval_loss": 1.7385518550872803, + "eval_precision": { + "precision": 0.2476900084001273 + }, + "eval_recall": { + "recall": 0.3378007007551934 + }, + "eval_runtime": 102.5366, + "eval_samples_per_second": 37.596, + "eval_steps_per_second": 18.803, + "step": 9635 + }, + { + "epoch": 5.0, + "learning_rate": 0.000986487600747303, + "loss": 1.842, + "step": 9640 + }, + { + "epoch": 5.01, + "learning_rate": 0.0009864177986376428, + "loss": 1.6668, + "step": 9650 + }, + { + "epoch": 5.01, + "learning_rate": 0.0009863478191832755, + "loss": 1.7779, + "step": 9660 + }, + { + "epoch": 5.02, + "learning_rate": 0.0009862776624097146, + "loss": 1.9484, + "step": 9670 + }, + { + "epoch": 5.02, + "learning_rate": 0.0009862073283425387, + "loss": 1.7389, + "step": 9680 + }, + { + "epoch": 5.03, + "learning_rate": 0.0009861368170073915, + "loss": 1.7946, + "step": 9690 + }, + { + "epoch": 5.03, + "learning_rate": 0.0009860661284299807, + "loss": 1.9001, + "step": 9700 + }, + { + "epoch": 5.04, + "learning_rate": 0.000985995262636079, + "loss": 1.8473, + "step": 9710 + }, + { + "epoch": 5.04, + "learning_rate": 0.0009859242196515235, + "loss": 1.8383, + "step": 9720 + }, + { + "epoch": 5.05, + "learning_rate": 0.0009858529995022162, + "loss": 1.7071, + "step": 9730 + }, + { + "epoch": 5.05, + "learning_rate": 0.0009857816022141232, + "loss": 1.7298, + "step": 9740 + }, + { + "epoch": 5.06, + "learning_rate": 0.0009857100278132759, + "loss": 1.7207, + "step": 9750 + }, + { + "epoch": 5.06, + "learning_rate": 0.0009856382763257694, + "loss": 1.7967, + "step": 9760 + }, + { + "epoch": 5.07, + "learning_rate": 0.0009855663477777638, + "loss": 1.8998, + "step": 9770 + }, + { + "epoch": 5.08, + "learning_rate": 0.0009854942421954843, + "loss": 1.8676, + "step": 9780 + }, + { + "epoch": 5.08, + "learning_rate": 0.0009854219596052197, + "loss": 1.9189, + "step": 9790 + }, + { + "epoch": 5.09, + "learning_rate": 0.000985349500033324, + "loss": 1.7721, + "step": 9800 + }, + { + "epoch": 5.09, + "learning_rate": 0.000985276863506215, + "loss": 1.8258, + "step": 9810 + }, + { + "epoch": 5.1, + "learning_rate": 0.0009852040500503761, + "loss": 1.7623, + "step": 9820 + }, + { + "epoch": 5.1, + "learning_rate": 0.0009851310596923541, + "loss": 1.7993, + "step": 9830 + }, + { + "epoch": 5.11, + "learning_rate": 0.0009850578924587613, + "loss": 1.7903, + "step": 9840 + }, + { + "epoch": 5.11, + "learning_rate": 0.0009849845483762736, + "loss": 1.8658, + "step": 9850 + }, + { + "epoch": 5.12, + "learning_rate": 0.000984911027471632, + "loss": 1.8133, + "step": 9860 + }, + { + "epoch": 5.12, + "learning_rate": 0.0009848373297716413, + "loss": 1.842, + "step": 9870 + }, + { + "epoch": 5.13, + "learning_rate": 0.000984763455303172, + "loss": 1.7339, + "step": 9880 + }, + { + "epoch": 5.13, + "learning_rate": 0.0009846894040931573, + "loss": 1.7764, + "step": 9890 + }, + { + "epoch": 5.14, + "learning_rate": 0.0009846151761685965, + "loss": 1.832, + "step": 9900 + }, + { + "epoch": 5.14, + "learning_rate": 0.0009845407715565523, + "loss": 1.8243, + "step": 9910 + }, + { + "epoch": 5.15, + "learning_rate": 0.0009844661902841522, + "loss": 1.8842, + "step": 9920 + }, + { + "epoch": 5.15, + "learning_rate": 0.0009843914323785878, + "loss": 1.8539, + "step": 9930 + }, + { + "epoch": 5.16, + "learning_rate": 0.0009843164978671159, + "loss": 1.7264, + "step": 9940 + }, + { + "epoch": 5.16, + "learning_rate": 0.0009842413867770566, + "loss": 1.8868, + "step": 9950 + }, + { + "epoch": 5.17, + "learning_rate": 0.000984166099135795, + "loss": 1.7069, + "step": 9960 + }, + { + "epoch": 5.17, + "learning_rate": 0.0009840906349707808, + "loss": 1.7897, + "step": 9970 + }, + { + "epoch": 5.18, + "learning_rate": 0.0009840149943095274, + "loss": 1.7677, + "step": 9980 + }, + { + "epoch": 5.18, + "learning_rate": 0.0009839391771796129, + "loss": 1.643, + "step": 9990 + }, + { + "epoch": 5.19, + "learning_rate": 0.0009838631836086797, + "loss": 1.7819, + "step": 10000 + }, + { + "epoch": 5.19, + "learning_rate": 0.000983787013624435, + "loss": 1.8601, + "step": 10010 + }, + { + "epoch": 5.2, + "learning_rate": 0.0009837106672546492, + "loss": 1.7895, + "step": 10020 + }, + { + "epoch": 5.2, + "learning_rate": 0.0009836341445271582, + "loss": 1.8085, + "step": 10030 + }, + { + "epoch": 5.21, + "learning_rate": 0.0009835574454698612, + "loss": 1.787, + "step": 10040 + }, + { + "epoch": 5.22, + "learning_rate": 0.0009834805701107229, + "loss": 1.8519, + "step": 10050 + }, + { + "epoch": 5.22, + "learning_rate": 0.0009834035184777708, + "loss": 1.7236, + "step": 10060 + }, + { + "epoch": 5.23, + "learning_rate": 0.0009833262905990978, + "loss": 1.8543, + "step": 10070 + }, + { + "epoch": 5.23, + "learning_rate": 0.0009832488865028606, + "loss": 1.6843, + "step": 10080 + }, + { + "epoch": 5.24, + "learning_rate": 0.0009831713062172802, + "loss": 1.6678, + "step": 10090 + }, + { + "epoch": 5.24, + "learning_rate": 0.000983093549770642, + "loss": 1.9228, + "step": 10100 + }, + { + "epoch": 5.25, + "learning_rate": 0.000983015617191295, + "loss": 1.7595, + "step": 10110 + }, + { + "epoch": 5.25, + "learning_rate": 0.0009829375085076534, + "loss": 1.7518, + "step": 10120 + }, + { + "epoch": 5.26, + "learning_rate": 0.0009828592237481953, + "loss": 1.7682, + "step": 10130 + }, + { + "epoch": 5.26, + "learning_rate": 0.0009827807629414623, + "loss": 1.7051, + "step": 10140 + }, + { + "epoch": 5.27, + "learning_rate": 0.0009827021261160607, + "loss": 1.7019, + "step": 10150 + }, + { + "epoch": 5.27, + "learning_rate": 0.0009826233133006612, + "loss": 1.6837, + "step": 10160 + }, + { + "epoch": 5.28, + "learning_rate": 0.0009825443245239985, + "loss": 1.7594, + "step": 10170 + }, + { + "epoch": 5.28, + "learning_rate": 0.000982465159814871, + "loss": 1.867, + "step": 10180 + }, + { + "epoch": 5.29, + "learning_rate": 0.000982385819202142, + "loss": 1.6618, + "step": 10190 + }, + { + "epoch": 5.29, + "learning_rate": 0.0009823063027147383, + "loss": 1.6971, + "step": 10200 + }, + { + "epoch": 5.3, + "learning_rate": 0.0009822266103816516, + "loss": 1.9753, + "step": 10210 + }, + { + "epoch": 5.3, + "learning_rate": 0.0009821467422319364, + "loss": 1.7991, + "step": 10220 + }, + { + "epoch": 5.31, + "learning_rate": 0.0009820666982947126, + "loss": 1.8229, + "step": 10230 + }, + { + "epoch": 5.31, + "learning_rate": 0.0009819864785991638, + "loss": 1.8833, + "step": 10240 + }, + { + "epoch": 5.32, + "learning_rate": 0.0009819060831745372, + "loss": 1.8254, + "step": 10250 + }, + { + "epoch": 5.32, + "learning_rate": 0.0009818255120501447, + "loss": 1.7899, + "step": 10260 + }, + { + "epoch": 5.33, + "learning_rate": 0.0009817447652553618, + "loss": 1.7448, + "step": 10270 + }, + { + "epoch": 5.33, + "learning_rate": 0.0009816638428196286, + "loss": 1.7687, + "step": 10280 + }, + { + "epoch": 5.34, + "learning_rate": 0.0009815827447724483, + "loss": 1.7017, + "step": 10290 + }, + { + "epoch": 5.35, + "learning_rate": 0.0009815014711433891, + "loss": 1.6916, + "step": 10300 + }, + { + "epoch": 5.35, + "learning_rate": 0.000981420021962083, + "loss": 1.6695, + "step": 10310 + }, + { + "epoch": 5.36, + "learning_rate": 0.0009813383972582254, + "loss": 1.916, + "step": 10320 + }, + { + "epoch": 5.36, + "learning_rate": 0.0009812565970615766, + "loss": 1.8299, + "step": 10330 + }, + { + "epoch": 5.37, + "learning_rate": 0.0009811746214019599, + "loss": 1.7731, + "step": 10340 + }, + { + "epoch": 5.37, + "learning_rate": 0.0009810924703092634, + "loss": 1.8146, + "step": 10350 + }, + { + "epoch": 5.38, + "learning_rate": 0.0009810101438134388, + "loss": 1.794, + "step": 10360 + }, + { + "epoch": 5.38, + "learning_rate": 0.0009809276419445018, + "loss": 1.7443, + "step": 10370 + }, + { + "epoch": 5.39, + "learning_rate": 0.000980844964732532, + "loss": 1.6457, + "step": 10380 + }, + { + "epoch": 5.39, + "learning_rate": 0.0009807621122076729, + "loss": 1.7514, + "step": 10390 + }, + { + "epoch": 5.4, + "learning_rate": 0.000980679084400132, + "loss": 1.7748, + "step": 10400 + }, + { + "epoch": 5.4, + "learning_rate": 0.0009805958813401809, + "loss": 1.8627, + "step": 10410 + }, + { + "epoch": 5.41, + "learning_rate": 0.0009805125030581546, + "loss": 1.9099, + "step": 10420 + }, + { + "epoch": 5.41, + "learning_rate": 0.0009804289495844525, + "loss": 1.8258, + "step": 10430 + }, + { + "epoch": 5.42, + "learning_rate": 0.0009803452209495374, + "loss": 1.8714, + "step": 10440 + }, + { + "epoch": 5.42, + "learning_rate": 0.0009802613171839363, + "loss": 1.7998, + "step": 10450 + }, + { + "epoch": 5.43, + "learning_rate": 0.0009801772383182402, + "loss": 1.7625, + "step": 10460 + }, + { + "epoch": 5.43, + "learning_rate": 0.0009800929843831034, + "loss": 1.8561, + "step": 10470 + }, + { + "epoch": 5.44, + "learning_rate": 0.0009800085554092443, + "loss": 1.7429, + "step": 10480 + }, + { + "epoch": 5.44, + "learning_rate": 0.0009799239514274455, + "loss": 1.8117, + "step": 10490 + }, + { + "epoch": 5.45, + "learning_rate": 0.0009798391724685526, + "loss": 1.5906, + "step": 10500 + }, + { + "epoch": 5.45, + "learning_rate": 0.0009797542185634757, + "loss": 1.7367, + "step": 10510 + }, + { + "epoch": 5.46, + "learning_rate": 0.0009796690897431886, + "loss": 1.8763, + "step": 10520 + }, + { + "epoch": 5.46, + "learning_rate": 0.0009795837860387285, + "loss": 1.7159, + "step": 10530 + }, + { + "epoch": 5.47, + "learning_rate": 0.0009794983074811962, + "loss": 1.6452, + "step": 10540 + }, + { + "epoch": 5.47, + "learning_rate": 0.0009794126541017574, + "loss": 1.7642, + "step": 10550 + }, + { + "epoch": 5.48, + "learning_rate": 0.0009793268259316401, + "loss": 1.7403, + "step": 10560 + }, + { + "epoch": 5.49, + "learning_rate": 0.0009792408230021369, + "loss": 1.8482, + "step": 10570 + }, + { + "epoch": 5.49, + "learning_rate": 0.000979154645344604, + "loss": 1.8208, + "step": 10580 + }, + { + "epoch": 5.5, + "learning_rate": 0.0009790682929904611, + "loss": 1.8134, + "step": 10590 + }, + { + "epoch": 5.5, + "learning_rate": 0.0009789817659711917, + "loss": 1.7651, + "step": 10600 + }, + { + "epoch": 5.51, + "learning_rate": 0.000978895064318343, + "loss": 1.7863, + "step": 10610 + }, + { + "epoch": 5.51, + "learning_rate": 0.000978808188063526, + "loss": 1.7277, + "step": 10620 + }, + { + "epoch": 5.52, + "learning_rate": 0.0009787211372384147, + "loss": 1.777, + "step": 10630 + }, + { + "epoch": 5.52, + "learning_rate": 0.0009786339118747478, + "loss": 1.7441, + "step": 10640 + }, + { + "epoch": 5.53, + "learning_rate": 0.000978546512004327, + "loss": 1.9068, + "step": 10650 + }, + { + "epoch": 5.53, + "learning_rate": 0.0009784589376590175, + "loss": 1.7786, + "step": 10660 + }, + { + "epoch": 5.54, + "learning_rate": 0.0009783711888707485, + "loss": 1.7149, + "step": 10670 + }, + { + "epoch": 5.54, + "learning_rate": 0.0009782832656715123, + "loss": 1.7479, + "step": 10680 + }, + { + "epoch": 5.55, + "learning_rate": 0.0009781951680933657, + "loss": 1.7153, + "step": 10690 + }, + { + "epoch": 5.55, + "learning_rate": 0.0009781068961684282, + "loss": 1.8753, + "step": 10700 + }, + { + "epoch": 5.56, + "learning_rate": 0.000978018449928883, + "loss": 1.7972, + "step": 10710 + }, + { + "epoch": 5.56, + "learning_rate": 0.000977929829406977, + "loss": 1.6859, + "step": 10720 + }, + { + "epoch": 5.57, + "learning_rate": 0.0009778410346350209, + "loss": 1.8016, + "step": 10730 + }, + { + "epoch": 5.57, + "learning_rate": 0.0009777520656453886, + "loss": 1.6856, + "step": 10740 + }, + { + "epoch": 5.58, + "learning_rate": 0.0009776629224705175, + "loss": 1.7482, + "step": 10750 + }, + { + "epoch": 5.58, + "learning_rate": 0.000977573605142909, + "loss": 1.7562, + "step": 10760 + }, + { + "epoch": 5.59, + "learning_rate": 0.000977484113695127, + "loss": 1.7686, + "step": 10770 + }, + { + "epoch": 5.59, + "learning_rate": 0.0009773944481598, + "loss": 1.8105, + "step": 10780 + }, + { + "epoch": 5.6, + "learning_rate": 0.0009773046085696192, + "loss": 1.8326, + "step": 10790 + }, + { + "epoch": 5.6, + "learning_rate": 0.0009772145949573394, + "loss": 1.7717, + "step": 10800 + }, + { + "epoch": 5.61, + "learning_rate": 0.0009771244073557793, + "loss": 1.8094, + "step": 10810 + }, + { + "epoch": 5.61, + "learning_rate": 0.0009770340457978205, + "loss": 1.7605, + "step": 10820 + }, + { + "epoch": 5.62, + "learning_rate": 0.0009769435103164084, + "loss": 1.7474, + "step": 10830 + }, + { + "epoch": 5.63, + "learning_rate": 0.0009768528009445512, + "loss": 1.7731, + "step": 10840 + }, + { + "epoch": 5.63, + "learning_rate": 0.0009767619177153215, + "loss": 1.8068, + "step": 10850 + }, + { + "epoch": 5.64, + "learning_rate": 0.0009766708606618543, + "loss": 1.8979, + "step": 10860 + }, + { + "epoch": 5.64, + "learning_rate": 0.0009765796298173487, + "loss": 1.7847, + "step": 10870 + }, + { + "epoch": 5.65, + "learning_rate": 0.0009764882252150666, + "loss": 1.8401, + "step": 10880 + }, + { + "epoch": 5.65, + "learning_rate": 0.0009763966468883338, + "loss": 1.5723, + "step": 10890 + }, + { + "epoch": 5.66, + "learning_rate": 0.0009763048948705389, + "loss": 1.7514, + "step": 10900 + }, + { + "epoch": 5.66, + "learning_rate": 0.0009762129691951343, + "loss": 1.7905, + "step": 10910 + }, + { + "epoch": 5.67, + "learning_rate": 0.0009761208698956352, + "loss": 1.907, + "step": 10920 + }, + { + "epoch": 5.67, + "learning_rate": 0.0009760285970056205, + "loss": 1.8021, + "step": 10930 + }, + { + "epoch": 5.68, + "learning_rate": 0.0009759361505587324, + "loss": 1.6455, + "step": 10940 + }, + { + "epoch": 5.68, + "learning_rate": 0.0009758435305886763, + "loss": 1.8394, + "step": 10950 + }, + { + "epoch": 5.69, + "learning_rate": 0.0009757507371292207, + "loss": 1.7937, + "step": 10960 + }, + { + "epoch": 5.69, + "learning_rate": 0.0009756577702141973, + "loss": 1.7578, + "step": 10970 + }, + { + "epoch": 5.7, + "learning_rate": 0.0009755646298775016, + "loss": 1.7708, + "step": 10980 + }, + { + "epoch": 5.7, + "learning_rate": 0.0009754713161530919, + "loss": 1.8778, + "step": 10990 + }, + { + "epoch": 5.71, + "learning_rate": 0.0009753778290749895, + "loss": 1.843, + "step": 11000 + }, + { + "epoch": 5.71, + "learning_rate": 0.0009752841686772793, + "loss": 1.8345, + "step": 11010 + }, + { + "epoch": 5.72, + "learning_rate": 0.0009751903349941094, + "loss": 1.6698, + "step": 11020 + }, + { + "epoch": 5.72, + "learning_rate": 0.0009750963280596907, + "loss": 1.7422, + "step": 11030 + }, + { + "epoch": 5.73, + "learning_rate": 0.0009750021479082977, + "loss": 1.8949, + "step": 11040 + }, + { + "epoch": 5.73, + "learning_rate": 0.0009749077945742677, + "loss": 1.806, + "step": 11050 + }, + { + "epoch": 5.74, + "learning_rate": 0.0009748132680920014, + "loss": 1.7548, + "step": 11060 + }, + { + "epoch": 5.74, + "learning_rate": 0.0009747185684959625, + "loss": 1.795, + "step": 11070 + }, + { + "epoch": 5.75, + "learning_rate": 0.0009746236958206779, + "loss": 1.7781, + "step": 11080 + }, + { + "epoch": 5.76, + "learning_rate": 0.0009745286501007376, + "loss": 1.787, + "step": 11090 + }, + { + "epoch": 5.76, + "learning_rate": 0.0009744334313707944, + "loss": 1.8119, + "step": 11100 + }, + { + "epoch": 5.77, + "learning_rate": 0.0009743380396655646, + "loss": 1.7497, + "step": 11110 + }, + { + "epoch": 5.77, + "learning_rate": 0.0009742424750198273, + "loss": 1.7348, + "step": 11120 + }, + { + "epoch": 5.78, + "learning_rate": 0.0009741467374684247, + "loss": 1.7012, + "step": 11130 + }, + { + "epoch": 5.78, + "learning_rate": 0.0009740508270462625, + "loss": 1.816, + "step": 11140 + }, + { + "epoch": 5.79, + "learning_rate": 0.0009739547437883084, + "loss": 1.7649, + "step": 11150 + }, + { + "epoch": 5.79, + "learning_rate": 0.000973858487729594, + "loss": 1.8383, + "step": 11160 + }, + { + "epoch": 5.8, + "learning_rate": 0.0009737620589052136, + "loss": 1.7866, + "step": 11170 + }, + { + "epoch": 5.8, + "learning_rate": 0.0009736654573503247, + "loss": 1.7176, + "step": 11180 + }, + { + "epoch": 5.81, + "learning_rate": 0.0009735686831001473, + "loss": 1.7426, + "step": 11190 + }, + { + "epoch": 5.81, + "learning_rate": 0.0009734717361899646, + "loss": 1.7987, + "step": 11200 + }, + { + "epoch": 5.82, + "learning_rate": 0.0009733746166551232, + "loss": 1.7344, + "step": 11210 + }, + { + "epoch": 5.82, + "learning_rate": 0.0009732773245310318, + "loss": 1.809, + "step": 11220 + }, + { + "epoch": 5.83, + "learning_rate": 0.0009731798598531628, + "loss": 1.7074, + "step": 11230 + }, + { + "epoch": 5.83, + "learning_rate": 0.0009730822226570509, + "loss": 1.6944, + "step": 11240 + }, + { + "epoch": 5.84, + "learning_rate": 0.0009729844129782942, + "loss": 1.7243, + "step": 11250 + }, + { + "epoch": 5.84, + "learning_rate": 0.0009728864308525534, + "loss": 1.7516, + "step": 11260 + }, + { + "epoch": 5.85, + "learning_rate": 0.000972788276315552, + "loss": 1.7105, + "step": 11270 + }, + { + "epoch": 5.85, + "learning_rate": 0.0009726899494030768, + "loss": 1.6982, + "step": 11280 + }, + { + "epoch": 5.86, + "learning_rate": 0.0009725914501509767, + "loss": 1.7656, + "step": 11290 + }, + { + "epoch": 5.86, + "learning_rate": 0.0009724927785951643, + "loss": 1.8973, + "step": 11300 + }, + { + "epoch": 5.87, + "learning_rate": 0.0009723939347716143, + "loss": 1.9031, + "step": 11310 + }, + { + "epoch": 5.87, + "learning_rate": 0.0009722949187163646, + "loss": 1.8425, + "step": 11320 + }, + { + "epoch": 5.88, + "learning_rate": 0.0009721957304655159, + "loss": 1.8458, + "step": 11330 + }, + { + "epoch": 5.88, + "learning_rate": 0.0009720963700552315, + "loss": 1.7749, + "step": 11340 + }, + { + "epoch": 5.89, + "learning_rate": 0.0009719968375217374, + "loss": 1.9048, + "step": 11350 + }, + { + "epoch": 5.9, + "learning_rate": 0.0009718971329013229, + "loss": 1.7444, + "step": 11360 + }, + { + "epoch": 5.9, + "learning_rate": 0.0009717972562303391, + "loss": 1.6944, + "step": 11370 + }, + { + "epoch": 5.91, + "learning_rate": 0.0009716972075452009, + "loss": 1.7349, + "step": 11380 + }, + { + "epoch": 5.91, + "learning_rate": 0.0009715969868823849, + "loss": 1.8686, + "step": 11390 + }, + { + "epoch": 5.92, + "learning_rate": 0.0009714965942784313, + "loss": 1.6679, + "step": 11400 + }, + { + "epoch": 5.92, + "learning_rate": 0.0009713960297699424, + "loss": 1.7048, + "step": 11410 + }, + { + "epoch": 5.93, + "learning_rate": 0.0009712952933935832, + "loss": 1.8649, + "step": 11420 + }, + { + "epoch": 5.93, + "learning_rate": 0.0009711943851860819, + "loss": 1.7515, + "step": 11430 + }, + { + "epoch": 5.94, + "learning_rate": 0.0009710933051842288, + "loss": 1.9001, + "step": 11440 + }, + { + "epoch": 5.94, + "learning_rate": 0.0009709920534248769, + "loss": 1.8789, + "step": 11450 + }, + { + "epoch": 5.95, + "learning_rate": 0.0009708906299449418, + "loss": 1.9457, + "step": 11460 + }, + { + "epoch": 5.95, + "learning_rate": 0.0009707890347814022, + "loss": 1.8666, + "step": 11470 + }, + { + "epoch": 5.96, + "learning_rate": 0.0009706872679712986, + "loss": 1.7263, + "step": 11480 + }, + { + "epoch": 5.96, + "learning_rate": 0.0009705853295517348, + "loss": 1.8461, + "step": 11490 + }, + { + "epoch": 5.97, + "learning_rate": 0.0009704832195598769, + "loss": 1.7918, + "step": 11500 + }, + { + "epoch": 5.97, + "learning_rate": 0.0009703809380329534, + "loss": 1.8426, + "step": 11510 + }, + { + "epoch": 5.98, + "learning_rate": 0.0009702784850082553, + "loss": 1.7251, + "step": 11520 + }, + { + "epoch": 5.98, + "learning_rate": 0.0009701758605231368, + "loss": 1.6913, + "step": 11530 + }, + { + "epoch": 5.99, + "learning_rate": 0.0009700730646150135, + "loss": 1.5939, + "step": 11540 + }, + { + "epoch": 5.99, + "learning_rate": 0.0009699700973213645, + "loss": 1.8592, + "step": 11550 + }, + { + "epoch": 6.0, + "learning_rate": 0.0009698669586797311, + "loss": 1.7984, + "step": 11560 + }, + { + "epoch": 6.0, + "eval_accuracy": { + "accuracy": 0.3014267185473411 + }, + "eval_f1": { + "f1": 0.2074421859419775 + }, + "eval_loss": 1.7321857213974, + "eval_precision": { + "precision": 0.17325128260235972 + }, + "eval_recall": { + "recall": 0.31234162186318526 + }, + "eval_runtime": 103.9228, + "eval_samples_per_second": 37.095, + "eval_steps_per_second": 18.552, + "step": 11562 + }, + { + "epoch": 6.0, + "learning_rate": 0.0009697636487277166, + "loss": 1.7792, + "step": 11570 + }, + { + "epoch": 6.01, + "learning_rate": 0.0009696601675029873, + "loss": 1.6447, + "step": 11580 + }, + { + "epoch": 6.01, + "learning_rate": 0.0009695565150432717, + "loss": 1.8134, + "step": 11590 + }, + { + "epoch": 6.02, + "learning_rate": 0.0009694526913863608, + "loss": 1.7022, + "step": 11600 + }, + { + "epoch": 6.02, + "learning_rate": 0.0009693486965701082, + "loss": 1.8088, + "step": 11610 + }, + { + "epoch": 6.03, + "learning_rate": 0.0009692445306324296, + "loss": 1.852, + "step": 11620 + }, + { + "epoch": 6.04, + "learning_rate": 0.0009691401936113032, + "loss": 1.8435, + "step": 11630 + }, + { + "epoch": 6.04, + "learning_rate": 0.0009690356855447695, + "loss": 1.8285, + "step": 11640 + }, + { + "epoch": 6.05, + "learning_rate": 0.0009689310064709316, + "loss": 1.7503, + "step": 11650 + }, + { + "epoch": 6.05, + "learning_rate": 0.0009688261564279548, + "loss": 1.8343, + "step": 11660 + }, + { + "epoch": 6.06, + "learning_rate": 0.0009687211354540664, + "loss": 1.7397, + "step": 11670 + }, + { + "epoch": 6.06, + "learning_rate": 0.0009686159435875569, + "loss": 1.8581, + "step": 11680 + }, + { + "epoch": 6.07, + "learning_rate": 0.0009685105808667782, + "loss": 1.9103, + "step": 11690 + }, + { + "epoch": 6.07, + "learning_rate": 0.0009684050473301449, + "loss": 1.7767, + "step": 11700 + }, + { + "epoch": 6.08, + "learning_rate": 0.0009682993430161339, + "loss": 1.7691, + "step": 11710 + }, + { + "epoch": 6.08, + "learning_rate": 0.0009681934679632842, + "loss": 1.8959, + "step": 11720 + }, + { + "epoch": 6.09, + "learning_rate": 0.0009680874222101972, + "loss": 1.7943, + "step": 11730 + }, + { + "epoch": 6.09, + "learning_rate": 0.0009679812057955365, + "loss": 1.8252, + "step": 11740 + }, + { + "epoch": 6.1, + "learning_rate": 0.0009678748187580278, + "loss": 1.8658, + "step": 11750 + }, + { + "epoch": 6.1, + "learning_rate": 0.0009677682611364593, + "loss": 1.9736, + "step": 11760 + }, + { + "epoch": 6.11, + "learning_rate": 0.000967661532969681, + "loss": 1.7876, + "step": 11770 + }, + { + "epoch": 6.11, + "learning_rate": 0.0009675546342966054, + "loss": 1.7609, + "step": 11780 + }, + { + "epoch": 6.12, + "learning_rate": 0.0009674475651562071, + "loss": 1.8479, + "step": 11790 + }, + { + "epoch": 6.12, + "learning_rate": 0.0009673403255875225, + "loss": 1.756, + "step": 11800 + }, + { + "epoch": 6.13, + "learning_rate": 0.0009672329156296508, + "loss": 1.7295, + "step": 11810 + }, + { + "epoch": 6.13, + "learning_rate": 0.0009671253353217528, + "loss": 2.0492, + "step": 11820 + }, + { + "epoch": 6.14, + "learning_rate": 0.0009670175847030515, + "loss": 1.7373, + "step": 11830 + }, + { + "epoch": 6.14, + "learning_rate": 0.0009669096638128322, + "loss": 1.9517, + "step": 11840 + }, + { + "epoch": 6.15, + "learning_rate": 0.000966801572690442, + "loss": 1.7751, + "step": 11850 + }, + { + "epoch": 6.15, + "learning_rate": 0.0009666933113752905, + "loss": 1.8541, + "step": 11860 + }, + { + "epoch": 6.16, + "learning_rate": 0.0009665848799068486, + "loss": 1.8422, + "step": 11870 + }, + { + "epoch": 6.17, + "learning_rate": 0.0009664762783246501, + "loss": 1.7967, + "step": 11880 + }, + { + "epoch": 6.17, + "learning_rate": 0.0009663675066682902, + "loss": 1.761, + "step": 11890 + }, + { + "epoch": 6.18, + "learning_rate": 0.0009662585649774264, + "loss": 1.7239, + "step": 11900 + }, + { + "epoch": 6.18, + "learning_rate": 0.000966149453291778, + "loss": 1.8602, + "step": 11910 + }, + { + "epoch": 6.19, + "learning_rate": 0.0009660401716511266, + "loss": 1.7893, + "step": 11920 + }, + { + "epoch": 6.19, + "learning_rate": 0.0009659307200953156, + "loss": 1.7777, + "step": 11930 + }, + { + "epoch": 6.2, + "learning_rate": 0.0009658210986642499, + "loss": 1.8514, + "step": 11940 + }, + { + "epoch": 6.2, + "learning_rate": 0.0009657113073978972, + "loss": 1.7697, + "step": 11950 + }, + { + "epoch": 6.21, + "learning_rate": 0.0009656013463362865, + "loss": 1.7849, + "step": 11960 + }, + { + "epoch": 6.21, + "learning_rate": 0.0009654912155195088, + "loss": 1.7234, + "step": 11970 + }, + { + "epoch": 6.22, + "learning_rate": 0.0009653809149877172, + "loss": 1.7891, + "step": 11980 + }, + { + "epoch": 6.22, + "learning_rate": 0.0009652704447811265, + "loss": 1.6848, + "step": 11990 + }, + { + "epoch": 6.23, + "learning_rate": 0.0009651598049400132, + "loss": 1.8787, + "step": 12000 + }, + { + "epoch": 6.23, + "learning_rate": 0.0009650489955047161, + "loss": 1.8163, + "step": 12010 + }, + { + "epoch": 6.24, + "learning_rate": 0.0009649380165156356, + "loss": 1.6971, + "step": 12020 + }, + { + "epoch": 6.24, + "learning_rate": 0.0009648268680132338, + "loss": 1.7703, + "step": 12030 + }, + { + "epoch": 6.25, + "learning_rate": 0.0009647155500380348, + "loss": 1.8627, + "step": 12040 + }, + { + "epoch": 6.25, + "learning_rate": 0.0009646040626306243, + "loss": 1.831, + "step": 12050 + }, + { + "epoch": 6.26, + "learning_rate": 0.0009644924058316498, + "loss": 1.6914, + "step": 12060 + }, + { + "epoch": 6.26, + "learning_rate": 0.0009643805796818208, + "loss": 1.8321, + "step": 12070 + }, + { + "epoch": 6.27, + "learning_rate": 0.0009642685842219084, + "loss": 1.7376, + "step": 12080 + }, + { + "epoch": 6.27, + "learning_rate": 0.0009641564194927451, + "loss": 1.7676, + "step": 12090 + }, + { + "epoch": 6.28, + "learning_rate": 0.0009640440855352257, + "loss": 1.88, + "step": 12100 + }, + { + "epoch": 6.28, + "learning_rate": 0.0009639315823903065, + "loss": 1.7956, + "step": 12110 + }, + { + "epoch": 6.29, + "learning_rate": 0.000963818910099005, + "loss": 1.8877, + "step": 12120 + }, + { + "epoch": 6.29, + "learning_rate": 0.0009637060687024012, + "loss": 1.8099, + "step": 12130 + }, + { + "epoch": 6.3, + "learning_rate": 0.000963593058241636, + "loss": 1.7973, + "step": 12140 + }, + { + "epoch": 6.31, + "learning_rate": 0.0009634798787579124, + "loss": 1.7283, + "step": 12150 + }, + { + "epoch": 6.31, + "learning_rate": 0.000963366530292495, + "loss": 1.8744, + "step": 12160 + }, + { + "epoch": 6.32, + "learning_rate": 0.0009632530128867099, + "loss": 1.7395, + "step": 12170 + }, + { + "epoch": 6.32, + "learning_rate": 0.0009631393265819444, + "loss": 1.7538, + "step": 12180 + }, + { + "epoch": 6.33, + "learning_rate": 0.0009630254714196483, + "loss": 1.8393, + "step": 12190 + }, + { + "epoch": 6.33, + "learning_rate": 0.000962911447441332, + "loss": 1.7348, + "step": 12200 + }, + { + "epoch": 6.34, + "learning_rate": 0.0009627972546885682, + "loss": 1.8174, + "step": 12210 + }, + { + "epoch": 6.34, + "learning_rate": 0.0009626828932029907, + "loss": 1.7659, + "step": 12220 + }, + { + "epoch": 6.35, + "learning_rate": 0.0009625683630262951, + "loss": 1.7505, + "step": 12230 + }, + { + "epoch": 6.35, + "learning_rate": 0.0009624536642002379, + "loss": 1.7804, + "step": 12240 + }, + { + "epoch": 6.36, + "learning_rate": 0.0009623387967666382, + "loss": 1.71, + "step": 12250 + }, + { + "epoch": 6.36, + "learning_rate": 0.0009622237607673752, + "loss": 1.6895, + "step": 12260 + }, + { + "epoch": 6.37, + "learning_rate": 0.0009621085562443908, + "loss": 1.8456, + "step": 12270 + }, + { + "epoch": 6.37, + "learning_rate": 0.0009619931832396873, + "loss": 1.8019, + "step": 12280 + }, + { + "epoch": 6.38, + "learning_rate": 0.0009618776417953294, + "loss": 1.6919, + "step": 12290 + }, + { + "epoch": 6.38, + "learning_rate": 0.0009617619319534428, + "loss": 1.7653, + "step": 12300 + }, + { + "epoch": 6.39, + "learning_rate": 0.000961646053756214, + "loss": 1.7869, + "step": 12310 + }, + { + "epoch": 6.39, + "learning_rate": 0.0009615300072458919, + "loss": 1.8052, + "step": 12320 + }, + { + "epoch": 6.4, + "learning_rate": 0.0009614137924647861, + "loss": 1.7572, + "step": 12330 + }, + { + "epoch": 6.4, + "learning_rate": 0.0009612974094552678, + "loss": 1.6216, + "step": 12340 + }, + { + "epoch": 6.41, + "learning_rate": 0.0009611808582597694, + "loss": 1.8323, + "step": 12350 + }, + { + "epoch": 6.41, + "learning_rate": 0.0009610641389207848, + "loss": 1.8704, + "step": 12360 + }, + { + "epoch": 6.42, + "learning_rate": 0.000960947251480869, + "loss": 1.8894, + "step": 12370 + }, + { + "epoch": 6.42, + "learning_rate": 0.0009608301959826384, + "loss": 1.8785, + "step": 12380 + }, + { + "epoch": 6.43, + "learning_rate": 0.0009607129724687708, + "loss": 1.8482, + "step": 12390 + }, + { + "epoch": 6.43, + "learning_rate": 0.0009605955809820049, + "loss": 1.904, + "step": 12400 + }, + { + "epoch": 6.44, + "learning_rate": 0.000960478021565141, + "loss": 1.8867, + "step": 12410 + }, + { + "epoch": 6.45, + "learning_rate": 0.0009603602942610403, + "loss": 1.907, + "step": 12420 + }, + { + "epoch": 6.45, + "learning_rate": 0.0009602423991126258, + "loss": 1.9181, + "step": 12430 + }, + { + "epoch": 6.46, + "learning_rate": 0.0009601243361628809, + "loss": 1.8593, + "step": 12440 + }, + { + "epoch": 6.46, + "learning_rate": 0.0009600061054548506, + "loss": 1.7883, + "step": 12450 + }, + { + "epoch": 6.47, + "learning_rate": 0.0009598877070316412, + "loss": 1.7691, + "step": 12460 + }, + { + "epoch": 6.47, + "learning_rate": 0.0009597691409364199, + "loss": 1.8437, + "step": 12470 + }, + { + "epoch": 6.48, + "learning_rate": 0.0009596504072124153, + "loss": 1.8041, + "step": 12480 + }, + { + "epoch": 6.48, + "learning_rate": 0.0009595315059029166, + "loss": 1.8488, + "step": 12490 + }, + { + "epoch": 6.49, + "learning_rate": 0.0009594124370512746, + "loss": 1.8085, + "step": 12500 + }, + { + "epoch": 6.49, + "learning_rate": 0.0009592932007009009, + "loss": 1.7688, + "step": 12510 + }, + { + "epoch": 6.5, + "learning_rate": 0.0009591737968952685, + "loss": 1.8884, + "step": 12520 + }, + { + "epoch": 6.5, + "learning_rate": 0.0009590542256779111, + "loss": 1.9053, + "step": 12530 + }, + { + "epoch": 6.51, + "learning_rate": 0.0009589344870924237, + "loss": 1.9115, + "step": 12540 + }, + { + "epoch": 6.51, + "learning_rate": 0.000958814581182462, + "loss": 1.8114, + "step": 12550 + }, + { + "epoch": 6.52, + "learning_rate": 0.0009586945079917429, + "loss": 1.8891, + "step": 12560 + }, + { + "epoch": 6.52, + "learning_rate": 0.0009585742675640445, + "loss": 1.8384, + "step": 12570 + }, + { + "epoch": 6.53, + "learning_rate": 0.0009584538599432056, + "loss": 1.859, + "step": 12580 + }, + { + "epoch": 6.53, + "learning_rate": 0.0009583332851731258, + "loss": 1.7339, + "step": 12590 + }, + { + "epoch": 6.54, + "learning_rate": 0.0009582125432977661, + "loss": 1.8824, + "step": 12600 + }, + { + "epoch": 6.54, + "learning_rate": 0.0009580916343611481, + "loss": 1.751, + "step": 12610 + }, + { + "epoch": 6.55, + "learning_rate": 0.0009579705584073544, + "loss": 1.6991, + "step": 12620 + }, + { + "epoch": 6.55, + "learning_rate": 0.0009578493154805285, + "loss": 1.7719, + "step": 12630 + }, + { + "epoch": 6.56, + "learning_rate": 0.0009577279056248749, + "loss": 1.8559, + "step": 12640 + }, + { + "epoch": 6.56, + "learning_rate": 0.0009576063288846585, + "loss": 1.7681, + "step": 12650 + }, + { + "epoch": 6.57, + "learning_rate": 0.0009574845853042057, + "loss": 1.7717, + "step": 12660 + }, + { + "epoch": 6.57, + "learning_rate": 0.0009573626749279032, + "loss": 1.8828, + "step": 12670 + }, + { + "epoch": 6.58, + "learning_rate": 0.0009572405978001988, + "loss": 1.7839, + "step": 12680 + }, + { + "epoch": 6.59, + "learning_rate": 0.000957118353965601, + "loss": 1.714, + "step": 12690 + }, + { + "epoch": 6.59, + "learning_rate": 0.0009569959434686792, + "loss": 1.8501, + "step": 12700 + }, + { + "epoch": 6.6, + "learning_rate": 0.0009568733663540634, + "loss": 1.7382, + "step": 12710 + }, + { + "epoch": 6.6, + "learning_rate": 0.0009567506226664441, + "loss": 1.8327, + "step": 12720 + }, + { + "epoch": 6.61, + "learning_rate": 0.0009566277124505733, + "loss": 1.8203, + "step": 12730 + }, + { + "epoch": 6.61, + "learning_rate": 0.000956504635751263, + "loss": 1.7329, + "step": 12740 + }, + { + "epoch": 6.62, + "learning_rate": 0.0009563813926133862, + "loss": 1.7699, + "step": 12750 + }, + { + "epoch": 6.62, + "learning_rate": 0.0009562579830818765, + "loss": 1.8813, + "step": 12760 + }, + { + "epoch": 6.63, + "learning_rate": 0.0009561344072017283, + "loss": 1.9075, + "step": 12770 + }, + { + "epoch": 6.63, + "learning_rate": 0.0009560106650179964, + "loss": 1.7317, + "step": 12780 + }, + { + "epoch": 6.64, + "learning_rate": 0.0009558867565757965, + "loss": 1.6895, + "step": 12790 + }, + { + "epoch": 6.64, + "learning_rate": 0.0009557626819203047, + "loss": 1.6617, + "step": 12800 + }, + { + "epoch": 6.65, + "learning_rate": 0.0009556384410967578, + "loss": 1.7724, + "step": 12810 + }, + { + "epoch": 6.65, + "learning_rate": 0.0009555140341504534, + "loss": 1.8285, + "step": 12820 + }, + { + "epoch": 6.66, + "learning_rate": 0.000955389461126749, + "loss": 1.8063, + "step": 12830 + }, + { + "epoch": 6.66, + "learning_rate": 0.0009552647220710636, + "loss": 1.725, + "step": 12840 + }, + { + "epoch": 6.67, + "learning_rate": 0.000955139817028876, + "loss": 1.6705, + "step": 12850 + }, + { + "epoch": 6.67, + "learning_rate": 0.0009550147460457256, + "loss": 1.7254, + "step": 12860 + }, + { + "epoch": 6.68, + "learning_rate": 0.0009548895091672127, + "loss": 1.8107, + "step": 12870 + }, + { + "epoch": 6.68, + "learning_rate": 0.0009547641064389978, + "loss": 1.6768, + "step": 12880 + }, + { + "epoch": 6.69, + "learning_rate": 0.0009546385379068018, + "loss": 1.7145, + "step": 12890 + }, + { + "epoch": 6.69, + "learning_rate": 0.0009545128036164064, + "loss": 1.7159, + "step": 12900 + }, + { + "epoch": 6.7, + "learning_rate": 0.000954386903613653, + "loss": 1.7503, + "step": 12910 + }, + { + "epoch": 6.7, + "learning_rate": 0.0009542608379444444, + "loss": 1.7583, + "step": 12920 + }, + { + "epoch": 6.71, + "learning_rate": 0.000954134606654743, + "loss": 1.8885, + "step": 12930 + }, + { + "epoch": 6.72, + "learning_rate": 0.000954008209790572, + "loss": 1.6543, + "step": 12940 + }, + { + "epoch": 6.72, + "learning_rate": 0.0009538816473980149, + "loss": 1.8198, + "step": 12950 + }, + { + "epoch": 6.73, + "learning_rate": 0.0009537549195232156, + "loss": 1.7566, + "step": 12960 + }, + { + "epoch": 6.73, + "learning_rate": 0.000953628026212378, + "loss": 1.7466, + "step": 12970 + }, + { + "epoch": 6.74, + "learning_rate": 0.0009535009675117665, + "loss": 1.6681, + "step": 12980 + }, + { + "epoch": 6.74, + "learning_rate": 0.0009533737434677063, + "loss": 1.8596, + "step": 12990 + }, + { + "epoch": 6.75, + "learning_rate": 0.0009532463541265822, + "loss": 1.8017, + "step": 13000 + }, + { + "epoch": 6.75, + "learning_rate": 0.0009531187995348394, + "loss": 1.7382, + "step": 13010 + }, + { + "epoch": 6.76, + "learning_rate": 0.0009529910797389835, + "loss": 1.7205, + "step": 13020 + }, + { + "epoch": 6.76, + "learning_rate": 0.0009528631947855803, + "loss": 1.9236, + "step": 13030 + }, + { + "epoch": 6.77, + "learning_rate": 0.000952735144721256, + "loss": 1.596, + "step": 13040 + }, + { + "epoch": 6.77, + "learning_rate": 0.0009526069295926966, + "loss": 1.8309, + "step": 13050 + }, + { + "epoch": 6.78, + "learning_rate": 0.0009524785494466485, + "loss": 1.7308, + "step": 13060 + }, + { + "epoch": 6.78, + "learning_rate": 0.0009523500043299184, + "loss": 1.7496, + "step": 13070 + }, + { + "epoch": 6.79, + "learning_rate": 0.0009522212942893728, + "loss": 1.639, + "step": 13080 + }, + { + "epoch": 6.79, + "learning_rate": 0.0009520924193719387, + "loss": 1.8481, + "step": 13090 + }, + { + "epoch": 6.8, + "learning_rate": 0.0009519633796246028, + "loss": 1.5754, + "step": 13100 + }, + { + "epoch": 6.8, + "learning_rate": 0.0009518341750944124, + "loss": 1.91, + "step": 13110 + }, + { + "epoch": 6.81, + "learning_rate": 0.0009517048058284745, + "loss": 1.7873, + "step": 13120 + }, + { + "epoch": 6.81, + "learning_rate": 0.0009515752718739564, + "loss": 1.9317, + "step": 13130 + }, + { + "epoch": 6.82, + "learning_rate": 0.0009514455732780851, + "loss": 1.8516, + "step": 13140 + }, + { + "epoch": 6.82, + "learning_rate": 0.000951315710088148, + "loss": 1.8624, + "step": 13150 + }, + { + "epoch": 6.83, + "learning_rate": 0.0009511856823514923, + "loss": 1.7967, + "step": 13160 + }, + { + "epoch": 6.83, + "learning_rate": 0.0009510554901155252, + "loss": 1.6538, + "step": 13170 + }, + { + "epoch": 6.84, + "learning_rate": 0.0009509251334277141, + "loss": 1.7719, + "step": 13180 + }, + { + "epoch": 6.84, + "learning_rate": 0.000950794612335586, + "loss": 1.7984, + "step": 13190 + }, + { + "epoch": 6.85, + "learning_rate": 0.0009506639268867283, + "loss": 1.9263, + "step": 13200 + }, + { + "epoch": 6.86, + "learning_rate": 0.0009505330771287877, + "loss": 1.838, + "step": 13210 + }, + { + "epoch": 6.86, + "learning_rate": 0.0009504020631094715, + "loss": 1.7548, + "step": 13220 + }, + { + "epoch": 6.87, + "learning_rate": 0.000950270884876546, + "loss": 1.8724, + "step": 13230 + }, + { + "epoch": 6.87, + "learning_rate": 0.0009501395424778385, + "loss": 1.7411, + "step": 13240 + }, + { + "epoch": 6.88, + "learning_rate": 0.0009500080359612353, + "loss": 1.7446, + "step": 13250 + }, + { + "epoch": 6.88, + "learning_rate": 0.0009498763653746829, + "loss": 1.7895, + "step": 13260 + }, + { + "epoch": 6.89, + "learning_rate": 0.0009497445307661875, + "loss": 1.6295, + "step": 13270 + }, + { + "epoch": 6.89, + "learning_rate": 0.000949612532183815, + "loss": 1.6633, + "step": 13280 + }, + { + "epoch": 6.9, + "learning_rate": 0.0009494803696756913, + "loss": 2.1007, + "step": 13290 + }, + { + "epoch": 6.9, + "learning_rate": 0.0009493480432900021, + "loss": 1.7611, + "step": 13300 + }, + { + "epoch": 6.91, + "learning_rate": 0.0009492155530749925, + "loss": 1.847, + "step": 13310 + }, + { + "epoch": 6.91, + "learning_rate": 0.0009490828990789677, + "loss": 1.7704, + "step": 13320 + }, + { + "epoch": 6.92, + "learning_rate": 0.0009489500813502923, + "loss": 1.7631, + "step": 13330 + }, + { + "epoch": 6.92, + "learning_rate": 0.0009488170999373911, + "loss": 1.8335, + "step": 13340 + }, + { + "epoch": 6.93, + "learning_rate": 0.0009486839548887479, + "loss": 1.852, + "step": 13350 + }, + { + "epoch": 6.93, + "learning_rate": 0.0009485506462529067, + "loss": 1.609, + "step": 13360 + }, + { + "epoch": 6.94, + "learning_rate": 0.0009484171740784708, + "loss": 1.7785, + "step": 13370 + }, + { + "epoch": 6.94, + "learning_rate": 0.0009482835384141034, + "loss": 1.7499, + "step": 13380 + }, + { + "epoch": 6.95, + "learning_rate": 0.0009481497393085272, + "loss": 1.8266, + "step": 13390 + }, + { + "epoch": 6.95, + "learning_rate": 0.0009480157768105242, + "loss": 1.7917, + "step": 13400 + }, + { + "epoch": 6.96, + "learning_rate": 0.0009478816509689367, + "loss": 1.753, + "step": 13410 + }, + { + "epoch": 6.96, + "learning_rate": 0.0009477473618326656, + "loss": 1.7479, + "step": 13420 + }, + { + "epoch": 6.97, + "learning_rate": 0.0009476129094506722, + "loss": 1.7644, + "step": 13430 + }, + { + "epoch": 6.97, + "learning_rate": 0.0009474782938719769, + "loss": 1.6127, + "step": 13440 + }, + { + "epoch": 6.98, + "learning_rate": 0.0009473435151456593, + "loss": 1.817, + "step": 13450 + }, + { + "epoch": 6.98, + "learning_rate": 0.0009472085733208592, + "loss": 1.6814, + "step": 13460 + }, + { + "epoch": 6.99, + "learning_rate": 0.0009470734684467754, + "loss": 1.6956, + "step": 13470 + }, + { + "epoch": 7.0, + "learning_rate": 0.0009469382005726661, + "loss": 1.7385, + "step": 13480 + }, + { + "epoch": 7.0, + "eval_accuracy": { + "accuracy": 0.3120622568093385 + }, + "eval_f1": { + "f1": 0.2220020856519651 + }, + "eval_loss": 1.6964426040649414, + "eval_precision": { + "precision": 0.24821744277779434 + }, + "eval_recall": { + "recall": 0.3239827067963708 + }, + "eval_runtime": 102.2041, + "eval_samples_per_second": 37.719, + "eval_steps_per_second": 18.864, + "step": 13489 + }, + { + "epoch": 7.0, + "learning_rate": 0.0009468027697478494, + "loss": 1.867, + "step": 13490 + }, + { + "epoch": 7.01, + "learning_rate": 0.0009466671760217023, + "loss": 1.7677, + "step": 13500 + }, + { + "epoch": 7.01, + "learning_rate": 0.0009465314194436611, + "loss": 1.8369, + "step": 13510 + }, + { + "epoch": 7.02, + "learning_rate": 0.0009463955000632222, + "loss": 1.8289, + "step": 13520 + }, + { + "epoch": 7.02, + "learning_rate": 0.0009462594179299406, + "loss": 1.819, + "step": 13530 + }, + { + "epoch": 7.03, + "learning_rate": 0.0009461231730934312, + "loss": 1.8158, + "step": 13540 + }, + { + "epoch": 7.03, + "learning_rate": 0.0009459867656033676, + "loss": 1.7489, + "step": 13550 + }, + { + "epoch": 7.04, + "learning_rate": 0.0009458501955094835, + "loss": 1.7089, + "step": 13560 + }, + { + "epoch": 7.04, + "learning_rate": 0.0009457134628615712, + "loss": 1.9333, + "step": 13570 + }, + { + "epoch": 7.05, + "learning_rate": 0.0009455765677094825, + "loss": 1.8292, + "step": 13580 + }, + { + "epoch": 7.05, + "learning_rate": 0.0009454395101031283, + "loss": 1.8648, + "step": 13590 + }, + { + "epoch": 7.06, + "learning_rate": 0.0009453022900924795, + "loss": 1.7783, + "step": 13600 + }, + { + "epoch": 7.06, + "learning_rate": 0.0009451649077275651, + "loss": 1.7176, + "step": 13610 + }, + { + "epoch": 7.07, + "learning_rate": 0.0009450273630584738, + "loss": 1.6828, + "step": 13620 + }, + { + "epoch": 7.07, + "learning_rate": 0.0009448896561353536, + "loss": 1.8494, + "step": 13630 + }, + { + "epoch": 7.08, + "learning_rate": 0.0009447517870084116, + "loss": 1.7528, + "step": 13640 + }, + { + "epoch": 7.08, + "learning_rate": 0.000944613755727914, + "loss": 1.7617, + "step": 13650 + }, + { + "epoch": 7.09, + "learning_rate": 0.0009444755623441858, + "loss": 1.8195, + "step": 13660 + }, + { + "epoch": 7.09, + "learning_rate": 0.0009443372069076118, + "loss": 1.8131, + "step": 13670 + }, + { + "epoch": 7.1, + "learning_rate": 0.000944198689468635, + "loss": 1.6968, + "step": 13680 + }, + { + "epoch": 7.1, + "learning_rate": 0.0009440600100777583, + "loss": 1.7565, + "step": 13690 + }, + { + "epoch": 7.11, + "learning_rate": 0.0009439211687855433, + "loss": 1.7608, + "step": 13700 + }, + { + "epoch": 7.11, + "learning_rate": 0.0009437821656426104, + "loss": 1.7052, + "step": 13710 + }, + { + "epoch": 7.12, + "learning_rate": 0.0009436430006996392, + "loss": 1.8568, + "step": 13720 + }, + { + "epoch": 7.13, + "learning_rate": 0.0009435036740073687, + "loss": 1.9427, + "step": 13730 + }, + { + "epoch": 7.13, + "learning_rate": 0.0009433641856165961, + "loss": 1.7566, + "step": 13740 + }, + { + "epoch": 7.14, + "learning_rate": 0.0009432245355781782, + "loss": 1.7976, + "step": 13750 + }, + { + "epoch": 7.14, + "learning_rate": 0.0009430847239430302, + "loss": 1.7797, + "step": 13760 + }, + { + "epoch": 7.15, + "learning_rate": 0.0009429447507621268, + "loss": 1.6913, + "step": 13770 + }, + { + "epoch": 7.15, + "learning_rate": 0.0009428046160865009, + "loss": 1.7375, + "step": 13780 + }, + { + "epoch": 7.16, + "learning_rate": 0.0009426643199672451, + "loss": 1.7487, + "step": 13790 + }, + { + "epoch": 7.16, + "learning_rate": 0.0009425238624555101, + "loss": 1.7797, + "step": 13800 + }, + { + "epoch": 7.17, + "learning_rate": 0.0009423832436025062, + "loss": 1.9005, + "step": 13810 + }, + { + "epoch": 7.17, + "learning_rate": 0.0009422424634595017, + "loss": 1.7843, + "step": 13820 + }, + { + "epoch": 7.18, + "learning_rate": 0.0009421015220778242, + "loss": 1.7961, + "step": 13830 + }, + { + "epoch": 7.18, + "learning_rate": 0.00094196041950886, + "loss": 1.741, + "step": 13840 + }, + { + "epoch": 7.19, + "learning_rate": 0.0009418191558040543, + "loss": 1.724, + "step": 13850 + }, + { + "epoch": 7.19, + "learning_rate": 0.000941677731014911, + "loss": 1.7876, + "step": 13860 + }, + { + "epoch": 7.2, + "learning_rate": 0.0009415361451929922, + "loss": 1.7415, + "step": 13870 + }, + { + "epoch": 7.2, + "learning_rate": 0.0009413943983899194, + "loss": 1.6685, + "step": 13880 + }, + { + "epoch": 7.21, + "learning_rate": 0.0009412524906573726, + "loss": 1.6868, + "step": 13890 + }, + { + "epoch": 7.21, + "learning_rate": 0.0009411104220470903, + "loss": 1.8981, + "step": 13900 + }, + { + "epoch": 7.22, + "learning_rate": 0.00094096819261087, + "loss": 1.7498, + "step": 13910 + }, + { + "epoch": 7.22, + "learning_rate": 0.0009408258024005674, + "loss": 1.8692, + "step": 13920 + }, + { + "epoch": 7.23, + "learning_rate": 0.0009406832514680971, + "loss": 1.8906, + "step": 13930 + }, + { + "epoch": 7.23, + "learning_rate": 0.0009405405398654321, + "loss": 1.7531, + "step": 13940 + }, + { + "epoch": 7.24, + "learning_rate": 0.0009403976676446044, + "loss": 1.7559, + "step": 13950 + }, + { + "epoch": 7.24, + "learning_rate": 0.0009402546348577039, + "loss": 1.7564, + "step": 13960 + }, + { + "epoch": 7.25, + "learning_rate": 0.0009401114415568797, + "loss": 1.8532, + "step": 13970 + }, + { + "epoch": 7.25, + "learning_rate": 0.0009399680877943389, + "loss": 1.8019, + "step": 13980 + }, + { + "epoch": 7.26, + "learning_rate": 0.0009398245736223476, + "loss": 1.8442, + "step": 13990 + }, + { + "epoch": 7.27, + "learning_rate": 0.0009396808990932298, + "loss": 1.8377, + "step": 14000 + }, + { + "epoch": 7.27, + "learning_rate": 0.0009395370642593686, + "loss": 1.8058, + "step": 14010 + }, + { + "epoch": 7.28, + "learning_rate": 0.000939393069173205, + "loss": 1.6893, + "step": 14020 + }, + { + "epoch": 7.28, + "learning_rate": 0.0009392489138872386, + "loss": 1.7234, + "step": 14030 + }, + { + "epoch": 7.29, + "learning_rate": 0.0009391045984540278, + "loss": 1.7425, + "step": 14040 + }, + { + "epoch": 7.29, + "learning_rate": 0.0009389601229261888, + "loss": 1.7706, + "step": 14050 + }, + { + "epoch": 7.3, + "learning_rate": 0.0009388154873563964, + "loss": 1.8947, + "step": 14060 + }, + { + "epoch": 7.3, + "learning_rate": 0.000938670691797384, + "loss": 1.8376, + "step": 14070 + }, + { + "epoch": 7.31, + "learning_rate": 0.0009385257363019428, + "loss": 1.6913, + "step": 14080 + }, + { + "epoch": 7.31, + "learning_rate": 0.0009383806209229226, + "loss": 1.7429, + "step": 14090 + }, + { + "epoch": 7.32, + "learning_rate": 0.0009382353457132319, + "loss": 1.7054, + "step": 14100 + }, + { + "epoch": 7.32, + "learning_rate": 0.0009380899107258367, + "loss": 1.7288, + "step": 14110 + }, + { + "epoch": 7.33, + "learning_rate": 0.0009379443160137617, + "loss": 1.7682, + "step": 14120 + }, + { + "epoch": 7.33, + "learning_rate": 0.00093779856163009, + "loss": 1.7532, + "step": 14130 + }, + { + "epoch": 7.34, + "learning_rate": 0.0009376526476279625, + "loss": 1.6605, + "step": 14140 + }, + { + "epoch": 7.34, + "learning_rate": 0.0009375065740605785, + "loss": 1.8331, + "step": 14150 + }, + { + "epoch": 7.35, + "learning_rate": 0.0009373603409811954, + "loss": 1.7324, + "step": 14160 + }, + { + "epoch": 7.35, + "learning_rate": 0.0009372139484431288, + "loss": 1.7655, + "step": 14170 + }, + { + "epoch": 7.36, + "learning_rate": 0.0009370673964997527, + "loss": 1.8016, + "step": 14180 + }, + { + "epoch": 7.36, + "learning_rate": 0.0009369206852044985, + "loss": 1.7753, + "step": 14190 + }, + { + "epoch": 7.37, + "learning_rate": 0.0009367738146108566, + "loss": 1.6695, + "step": 14200 + }, + { + "epoch": 7.37, + "learning_rate": 0.0009366267847723751, + "loss": 1.7622, + "step": 14210 + }, + { + "epoch": 7.38, + "learning_rate": 0.0009364795957426596, + "loss": 1.6401, + "step": 14220 + }, + { + "epoch": 7.38, + "learning_rate": 0.0009363322475753746, + "loss": 1.7551, + "step": 14230 + }, + { + "epoch": 7.39, + "learning_rate": 0.0009361847403242423, + "loss": 1.78, + "step": 14240 + }, + { + "epoch": 7.39, + "learning_rate": 0.0009360370740430428, + "loss": 1.7698, + "step": 14250 + }, + { + "epoch": 7.4, + "learning_rate": 0.0009358892487856142, + "loss": 1.7255, + "step": 14260 + }, + { + "epoch": 7.41, + "learning_rate": 0.0009357412646058526, + "loss": 1.7899, + "step": 14270 + }, + { + "epoch": 7.41, + "learning_rate": 0.000935593121557712, + "loss": 1.717, + "step": 14280 + }, + { + "epoch": 7.42, + "learning_rate": 0.0009354448196952045, + "loss": 1.7246, + "step": 14290 + }, + { + "epoch": 7.42, + "learning_rate": 0.0009352963590723999, + "loss": 1.7834, + "step": 14300 + }, + { + "epoch": 7.43, + "learning_rate": 0.0009351477397434261, + "loss": 1.9131, + "step": 14310 + }, + { + "epoch": 7.43, + "learning_rate": 0.0009349989617624685, + "loss": 1.6523, + "step": 14320 + }, + { + "epoch": 7.44, + "learning_rate": 0.000934850025183771, + "loss": 1.771, + "step": 14330 + }, + { + "epoch": 7.44, + "learning_rate": 0.0009347009300616343, + "loss": 1.6129, + "step": 14340 + }, + { + "epoch": 7.45, + "learning_rate": 0.0009345516764504179, + "loss": 1.7177, + "step": 14350 + }, + { + "epoch": 7.45, + "learning_rate": 0.0009344022644045384, + "loss": 1.7367, + "step": 14360 + }, + { + "epoch": 7.46, + "learning_rate": 0.0009342526939784706, + "loss": 1.6732, + "step": 14370 + }, + { + "epoch": 7.46, + "learning_rate": 0.000934102965226747, + "loss": 1.6573, + "step": 14380 + }, + { + "epoch": 7.47, + "learning_rate": 0.0009339530782039575, + "loss": 1.7023, + "step": 14390 + }, + { + "epoch": 7.47, + "learning_rate": 0.00093380303296475, + "loss": 1.7721, + "step": 14400 + }, + { + "epoch": 7.48, + "learning_rate": 0.00093365282956383, + "loss": 1.8415, + "step": 14410 + }, + { + "epoch": 7.48, + "learning_rate": 0.0009335024680559608, + "loss": 1.7607, + "step": 14420 + }, + { + "epoch": 7.49, + "learning_rate": 0.000933351948495963, + "loss": 1.8485, + "step": 14430 + }, + { + "epoch": 7.49, + "learning_rate": 0.000933201270938715, + "loss": 1.7508, + "step": 14440 + }, + { + "epoch": 7.5, + "learning_rate": 0.0009330504354391531, + "loss": 1.8016, + "step": 14450 + }, + { + "epoch": 7.5, + "learning_rate": 0.0009328994420522707, + "loss": 1.7386, + "step": 14460 + }, + { + "epoch": 7.51, + "learning_rate": 0.000932748290833119, + "loss": 1.8136, + "step": 14470 + }, + { + "epoch": 7.51, + "learning_rate": 0.000932596981836807, + "loss": 1.7424, + "step": 14480 + }, + { + "epoch": 7.52, + "learning_rate": 0.0009324455151185008, + "loss": 1.6428, + "step": 14490 + }, + { + "epoch": 7.52, + "learning_rate": 0.0009322938907334238, + "loss": 1.7389, + "step": 14500 + }, + { + "epoch": 7.53, + "learning_rate": 0.0009321421087368578, + "loss": 1.6418, + "step": 14510 + }, + { + "epoch": 7.54, + "learning_rate": 0.0009319901691841413, + "loss": 1.774, + "step": 14520 + }, + { + "epoch": 7.54, + "learning_rate": 0.0009318380721306703, + "loss": 1.7014, + "step": 14530 + }, + { + "epoch": 7.55, + "learning_rate": 0.0009316858176318985, + "loss": 1.6742, + "step": 14540 + }, + { + "epoch": 7.55, + "learning_rate": 0.000931533405743337, + "loss": 1.6953, + "step": 14550 + }, + { + "epoch": 7.56, + "learning_rate": 0.000931380836520554, + "loss": 1.6567, + "step": 14560 + }, + { + "epoch": 7.56, + "learning_rate": 0.0009312281100191752, + "loss": 1.7334, + "step": 14570 + }, + { + "epoch": 7.57, + "learning_rate": 0.0009310752262948838, + "loss": 1.8719, + "step": 14580 + }, + { + "epoch": 7.57, + "learning_rate": 0.0009309221854034202, + "loss": 1.8223, + "step": 14590 + }, + { + "epoch": 7.58, + "learning_rate": 0.000930768987400582, + "loss": 1.8442, + "step": 14600 + }, + { + "epoch": 7.58, + "learning_rate": 0.0009306156323422244, + "loss": 1.7612, + "step": 14610 + }, + { + "epoch": 7.59, + "learning_rate": 0.0009304621202842593, + "loss": 1.8426, + "step": 14620 + }, + { + "epoch": 7.59, + "learning_rate": 0.0009303084512826562, + "loss": 1.7055, + "step": 14630 + }, + { + "epoch": 7.6, + "learning_rate": 0.0009301546253934422, + "loss": 1.6697, + "step": 14640 + }, + { + "epoch": 7.6, + "learning_rate": 0.0009300006426727008, + "loss": 1.7494, + "step": 14650 + }, + { + "epoch": 7.61, + "learning_rate": 0.0009298465031765732, + "loss": 1.6976, + "step": 14660 + }, + { + "epoch": 7.61, + "learning_rate": 0.0009296922069612578, + "loss": 1.8057, + "step": 14670 + }, + { + "epoch": 7.62, + "learning_rate": 0.0009295377540830097, + "loss": 1.9329, + "step": 14680 + }, + { + "epoch": 7.62, + "learning_rate": 0.0009293831445981415, + "loss": 1.7457, + "step": 14690 + }, + { + "epoch": 7.63, + "learning_rate": 0.000929228378563023, + "loss": 1.7327, + "step": 14700 + }, + { + "epoch": 7.63, + "learning_rate": 0.0009290734560340806, + "loss": 1.8215, + "step": 14710 + }, + { + "epoch": 7.64, + "learning_rate": 0.0009289183770677981, + "loss": 1.5827, + "step": 14720 + }, + { + "epoch": 7.64, + "learning_rate": 0.0009287631417207163, + "loss": 1.54, + "step": 14730 + }, + { + "epoch": 7.65, + "learning_rate": 0.000928607750049433, + "loss": 1.7683, + "step": 14740 + }, + { + "epoch": 7.65, + "learning_rate": 0.000928452202110603, + "loss": 1.8053, + "step": 14750 + }, + { + "epoch": 7.66, + "learning_rate": 0.0009282964979609379, + "loss": 1.8536, + "step": 14760 + }, + { + "epoch": 7.66, + "learning_rate": 0.0009281406376572066, + "loss": 1.7002, + "step": 14770 + }, + { + "epoch": 7.67, + "learning_rate": 0.0009279846212562344, + "loss": 1.7629, + "step": 14780 + }, + { + "epoch": 7.68, + "learning_rate": 0.0009278284488149042, + "loss": 1.677, + "step": 14790 + }, + { + "epoch": 7.68, + "learning_rate": 0.0009276721203901555, + "loss": 1.7235, + "step": 14800 + }, + { + "epoch": 7.69, + "learning_rate": 0.0009275156360389842, + "loss": 1.6409, + "step": 14810 + }, + { + "epoch": 7.69, + "learning_rate": 0.0009273589958184437, + "loss": 1.7063, + "step": 14820 + }, + { + "epoch": 7.7, + "learning_rate": 0.000927202199785644, + "loss": 1.7886, + "step": 14830 + }, + { + "epoch": 7.7, + "learning_rate": 0.0009270452479977519, + "loss": 1.9833, + "step": 14840 + }, + { + "epoch": 7.71, + "learning_rate": 0.0009268881405119909, + "loss": 1.7168, + "step": 14850 + }, + { + "epoch": 7.71, + "learning_rate": 0.0009267308773856413, + "loss": 1.7221, + "step": 14860 + }, + { + "epoch": 7.72, + "learning_rate": 0.0009265734586760405, + "loss": 1.7475, + "step": 14870 + }, + { + "epoch": 7.72, + "learning_rate": 0.0009264158844405818, + "loss": 1.6445, + "step": 14880 + }, + { + "epoch": 7.73, + "learning_rate": 0.0009262581547367164, + "loss": 1.7001, + "step": 14890 + }, + { + "epoch": 7.73, + "learning_rate": 0.000926100269621951, + "loss": 1.8752, + "step": 14900 + }, + { + "epoch": 7.74, + "learning_rate": 0.0009259422291538496, + "loss": 1.6439, + "step": 14910 + }, + { + "epoch": 7.74, + "learning_rate": 0.0009257840333900325, + "loss": 1.7411, + "step": 14920 + }, + { + "epoch": 7.75, + "learning_rate": 0.0009256256823881772, + "loss": 1.5836, + "step": 14930 + }, + { + "epoch": 7.75, + "learning_rate": 0.0009254671762060171, + "loss": 1.7718, + "step": 14940 + }, + { + "epoch": 7.76, + "learning_rate": 0.0009253085149013429, + "loss": 1.7117, + "step": 14950 + }, + { + "epoch": 7.76, + "learning_rate": 0.0009251496985320011, + "loss": 1.8424, + "step": 14960 + }, + { + "epoch": 7.77, + "learning_rate": 0.0009249907271558953, + "loss": 1.7725, + "step": 14970 + }, + { + "epoch": 7.77, + "learning_rate": 0.0009248316008309854, + "loss": 1.6359, + "step": 14980 + }, + { + "epoch": 7.78, + "learning_rate": 0.0009246723196152876, + "loss": 1.8313, + "step": 14990 + }, + { + "epoch": 7.78, + "learning_rate": 0.000924512883566875, + "loss": 1.8976, + "step": 15000 + }, + { + "epoch": 7.79, + "learning_rate": 0.0009243532927438767, + "loss": 1.7728, + "step": 15010 + }, + { + "epoch": 7.79, + "learning_rate": 0.0009241935472044787, + "loss": 1.8352, + "step": 15020 + }, + { + "epoch": 7.8, + "learning_rate": 0.0009240336470069231, + "loss": 1.7677, + "step": 15030 + }, + { + "epoch": 7.8, + "learning_rate": 0.0009238735922095083, + "loss": 1.9427, + "step": 15040 + }, + { + "epoch": 7.81, + "learning_rate": 0.0009237133828705893, + "loss": 1.8624, + "step": 15050 + }, + { + "epoch": 7.82, + "learning_rate": 0.0009235530190485776, + "loss": 1.8505, + "step": 15060 + }, + { + "epoch": 7.82, + "learning_rate": 0.0009233925008019405, + "loss": 1.827, + "step": 15070 + }, + { + "epoch": 7.83, + "learning_rate": 0.000923231828189202, + "loss": 1.8163, + "step": 15080 + }, + { + "epoch": 7.83, + "learning_rate": 0.0009230710012689421, + "loss": 1.8301, + "step": 15090 + }, + { + "epoch": 7.84, + "learning_rate": 0.0009229100200997976, + "loss": 1.7922, + "step": 15100 + }, + { + "epoch": 7.84, + "learning_rate": 0.0009227488847404608, + "loss": 1.7587, + "step": 15110 + }, + { + "epoch": 7.85, + "learning_rate": 0.0009225875952496807, + "loss": 1.9375, + "step": 15120 + }, + { + "epoch": 7.85, + "learning_rate": 0.0009224261516862625, + "loss": 1.8747, + "step": 15130 + }, + { + "epoch": 7.86, + "learning_rate": 0.0009222645541090673, + "loss": 1.9106, + "step": 15140 + }, + { + "epoch": 7.86, + "learning_rate": 0.0009221028025770126, + "loss": 1.8543, + "step": 15150 + }, + { + "epoch": 7.87, + "learning_rate": 0.0009219408971490719, + "loss": 1.8744, + "step": 15160 + }, + { + "epoch": 7.87, + "learning_rate": 0.0009217788378842748, + "loss": 1.8751, + "step": 15170 + }, + { + "epoch": 7.88, + "learning_rate": 0.0009216166248417072, + "loss": 1.7944, + "step": 15180 + }, + { + "epoch": 7.88, + "learning_rate": 0.0009214542580805108, + "loss": 1.8621, + "step": 15190 + }, + { + "epoch": 7.89, + "learning_rate": 0.0009212917376598832, + "loss": 1.8727, + "step": 15200 + }, + { + "epoch": 7.89, + "learning_rate": 0.0009211290636390787, + "loss": 1.8287, + "step": 15210 + }, + { + "epoch": 7.9, + "learning_rate": 0.0009209662360774071, + "loss": 1.8699, + "step": 15220 + }, + { + "epoch": 7.9, + "learning_rate": 0.000920803255034234, + "loss": 1.8996, + "step": 15230 + }, + { + "epoch": 7.91, + "learning_rate": 0.0009206401205689811, + "loss": 1.8532, + "step": 15240 + }, + { + "epoch": 7.91, + "learning_rate": 0.0009204768327411267, + "loss": 1.8261, + "step": 15250 + }, + { + "epoch": 7.92, + "learning_rate": 0.000920313391610204, + "loss": 1.8188, + "step": 15260 + }, + { + "epoch": 7.92, + "learning_rate": 0.0009201497972358028, + "loss": 1.7931, + "step": 15270 + }, + { + "epoch": 7.93, + "learning_rate": 0.0009199860496775683, + "loss": 1.6533, + "step": 15280 + }, + { + "epoch": 7.93, + "learning_rate": 0.0009198221489952019, + "loss": 1.705, + "step": 15290 + }, + { + "epoch": 7.94, + "learning_rate": 0.0009196580952484607, + "loss": 1.6474, + "step": 15300 + }, + { + "epoch": 7.94, + "learning_rate": 0.0009194938884971576, + "loss": 1.7114, + "step": 15310 + }, + { + "epoch": 7.95, + "learning_rate": 0.0009193295288011614, + "loss": 1.9186, + "step": 15320 + }, + { + "epoch": 7.96, + "learning_rate": 0.0009191650162203963, + "loss": 1.795, + "step": 15330 + }, + { + "epoch": 7.96, + "learning_rate": 0.0009190003508148428, + "loss": 1.6633, + "step": 15340 + }, + { + "epoch": 7.97, + "learning_rate": 0.0009188355326445366, + "loss": 1.7603, + "step": 15350 + }, + { + "epoch": 7.97, + "learning_rate": 0.0009186705617695695, + "loss": 1.5689, + "step": 15360 + }, + { + "epoch": 7.98, + "learning_rate": 0.0009185054382500884, + "loss": 1.8437, + "step": 15370 + }, + { + "epoch": 7.98, + "learning_rate": 0.0009183401621462966, + "loss": 1.8561, + "step": 15380 + }, + { + "epoch": 7.99, + "learning_rate": 0.0009181747335184527, + "loss": 1.754, + "step": 15390 + }, + { + "epoch": 7.99, + "learning_rate": 0.0009180091524268707, + "loss": 1.7165, + "step": 15400 + }, + { + "epoch": 8.0, + "learning_rate": 0.0009178434189319205, + "loss": 1.578, + "step": 15410 + }, + { + "epoch": 8.0, + "eval_accuracy": { + "accuracy": 0.2827496757457847 + }, + "eval_f1": { + "f1": 0.20468330816120264 + }, + "eval_loss": 1.6264866590499878, + "eval_precision": { + "precision": 0.23605832742279897 + }, + "eval_recall": { + "recall": 0.2983260879145022 + }, + "eval_runtime": 106.6464, + "eval_samples_per_second": 36.148, + "eval_steps_per_second": 18.078, + "step": 15416 + } + ], + "logging_steps": 10, + "max_steps": 57810, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 7.111502715796586e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}