diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,61427 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.8739594690479899, - "eval_steps": 500, - "global_step": 50500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 5.999999999999999e-06, - "loss": 11.0832, - "step": 5 - }, - { - "epoch": 0.0, - "learning_rate": 1.1999999999999999e-05, - "loss": 10.9995, - "step": 10 - }, - { - "epoch": 0.0, - "learning_rate": 1.7999999999999997e-05, - "loss": 10.8287, - "step": 15 - }, - { - "epoch": 0.0, - "learning_rate": 2.3999999999999997e-05, - "loss": 10.5347, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 2.9999999999999997e-05, - "loss": 10.1317, - "step": 25 - }, - { - "epoch": 0.0, - "learning_rate": 3.5999999999999994e-05, - "loss": 9.7568, - "step": 30 - }, - { - "epoch": 0.0, - "learning_rate": 4.2e-05, - "loss": 9.4066, - "step": 35 - }, - { - "epoch": 0.0, - "learning_rate": 4.7999999999999994e-05, - "loss": 9.0828, - "step": 40 - }, - { - "epoch": 0.0, - "learning_rate": 5.399999999999999e-05, - "loss": 8.7974, - "step": 45 - }, - { - "epoch": 0.0, - "learning_rate": 5.9999999999999995e-05, - "loss": 8.4785, - "step": 50 - }, - { - "epoch": 0.0, - "learning_rate": 6.599999999999999e-05, - "loss": 8.2844, - "step": 55 - }, - { - "epoch": 0.0, - "learning_rate": 7.199999999999999e-05, - "loss": 8.1159, - "step": 60 - }, - { - "epoch": 0.0, - "learning_rate": 7.8e-05, - "loss": 8.04, - "step": 65 - }, - { - "epoch": 0.0, - "learning_rate": 8.4e-05, - "loss": 8.0479, - "step": 70 - }, - { - "epoch": 0.0, - "learning_rate": 8.999999999999999e-05, - "loss": 7.785, - "step": 75 - }, - { - "epoch": 0.0, - "learning_rate": 9.599999999999999e-05, - "loss": 7.8489, - "step": 80 - }, - { - "epoch": 0.0, - "learning_rate": 0.000102, - "loss": 7.7499, - "step": 85 - }, - { - "epoch": 0.0, - "learning_rate": 0.00010799999999999998, - "loss": 7.596, - "step": 90 - }, - { - "epoch": 0.0, - "learning_rate": 0.00011399999999999999, - "loss": 7.6188, - "step": 95 - }, - { - "epoch": 0.0, - "learning_rate": 0.00011999999999999999, - "loss": 7.5564, - "step": 100 - }, - { - "epoch": 0.0, - "learning_rate": 0.00012599999999999997, - "loss": 7.5576, - "step": 105 - }, - { - "epoch": 0.0, - "learning_rate": 0.00013199999999999998, - "loss": 7.5225, - "step": 110 - }, - { - "epoch": 0.0, - "learning_rate": 0.000138, - "loss": 7.4858, - "step": 115 - }, - { - "epoch": 0.0, - "learning_rate": 0.00014399999999999998, - "loss": 7.2874, - "step": 120 - }, - { - "epoch": 0.0, - "learning_rate": 0.00015, - "loss": 7.3418, - "step": 125 - }, - { - "epoch": 0.0, - "learning_rate": 0.000156, - "loss": 7.2647, - "step": 130 - }, - { - "epoch": 0.0, - "learning_rate": 0.000162, - "loss": 7.1983, - "step": 135 - }, - { - "epoch": 0.0, - "learning_rate": 0.000168, - "loss": 7.0973, - "step": 140 - }, - { - "epoch": 0.0, - "learning_rate": 0.00017399999999999997, - "loss": 7.1693, - "step": 145 - }, - { - "epoch": 0.0, - "learning_rate": 0.00017999999999999998, - "loss": 7.1801, - "step": 150 - }, - { - "epoch": 0.0, - "learning_rate": 0.000186, - "loss": 7.0995, - "step": 155 - }, - { - "epoch": 0.0, - "learning_rate": 0.00019199999999999998, - "loss": 6.9925, - "step": 160 - }, - { - "epoch": 0.0, - "learning_rate": 0.000198, - "loss": 6.9763, - "step": 165 - }, - { - "epoch": 0.0, - "learning_rate": 0.000204, - "loss": 6.9261, - "step": 170 - }, - { - "epoch": 0.0, - "learning_rate": 0.00020999999999999998, - "loss": 6.9174, - "step": 175 - }, - { - "epoch": 0.0, - "learning_rate": 0.00021599999999999996, - "loss": 6.9063, - "step": 180 - }, - { - "epoch": 0.0, - "learning_rate": 0.00022199999999999998, - "loss": 6.8622, - "step": 185 - }, - { - "epoch": 0.0, - "learning_rate": 0.00022799999999999999, - "loss": 6.897, - "step": 190 - }, - { - "epoch": 0.0, - "learning_rate": 0.000234, - "loss": 6.8112, - "step": 195 - }, - { - "epoch": 0.0, - "learning_rate": 0.00023999999999999998, - "loss": 6.7515, - "step": 200 - }, - { - "epoch": 0.0, - "learning_rate": 0.00024599999999999996, - "loss": 6.7586, - "step": 205 - }, - { - "epoch": 0.0, - "learning_rate": 0.00025199999999999995, - "loss": 6.5961, - "step": 210 - }, - { - "epoch": 0.0, - "learning_rate": 0.000258, - "loss": 6.6598, - "step": 215 - }, - { - "epoch": 0.0, - "learning_rate": 0.00026399999999999997, - "loss": 6.6071, - "step": 220 - }, - { - "epoch": 0.0, - "learning_rate": 0.00027, - "loss": 6.5918, - "step": 225 - }, - { - "epoch": 0.0, - "learning_rate": 0.000276, - "loss": 6.539, - "step": 230 - }, - { - "epoch": 0.0, - "learning_rate": 0.00028199999999999997, - "loss": 6.4539, - "step": 235 - }, - { - "epoch": 0.0, - "learning_rate": 0.00028799999999999995, - "loss": 6.4418, - "step": 240 - }, - { - "epoch": 0.0, - "learning_rate": 0.000294, - "loss": 6.5303, - "step": 245 - }, - { - "epoch": 0.0, - "learning_rate": 0.0003, - "loss": 6.4021, - "step": 250 - }, - { - "epoch": 0.0, - "learning_rate": 0.00030599999999999996, - "loss": 6.4768, - "step": 255 - }, - { - "epoch": 0.0, - "learning_rate": 0.000312, - "loss": 6.3929, - "step": 260 - }, - { - "epoch": 0.0, - "learning_rate": 0.000318, - "loss": 6.3891, - "step": 265 - }, - { - "epoch": 0.0, - "learning_rate": 0.000324, - "loss": 6.4381, - "step": 270 - }, - { - "epoch": 0.0, - "learning_rate": 0.00033, - "loss": 6.3544, - "step": 275 - }, - { - "epoch": 0.0, - "learning_rate": 0.000336, - "loss": 6.355, - "step": 280 - }, - { - "epoch": 0.0, - "learning_rate": 0.00034199999999999996, - "loss": 6.3141, - "step": 285 - }, - { - "epoch": 0.01, - "learning_rate": 0.00034799999999999995, - "loss": 6.2884, - "step": 290 - }, - { - "epoch": 0.01, - "learning_rate": 0.00035399999999999993, - "loss": 6.3167, - "step": 295 - }, - { - "epoch": 0.01, - "learning_rate": 0.00035999999999999997, - "loss": 6.2576, - "step": 300 - }, - { - "epoch": 0.01, - "learning_rate": 0.00036599999999999995, - "loss": 6.2405, - "step": 305 - }, - { - "epoch": 0.01, - "learning_rate": 0.000372, - "loss": 6.2546, - "step": 310 - }, - { - "epoch": 0.01, - "learning_rate": 0.00037799999999999997, - "loss": 6.1438, - "step": 315 - }, - { - "epoch": 0.01, - "learning_rate": 0.00038399999999999996, - "loss": 6.1532, - "step": 320 - }, - { - "epoch": 0.01, - "learning_rate": 0.00039, - "loss": 6.1473, - "step": 325 - }, - { - "epoch": 0.01, - "learning_rate": 0.000396, - "loss": 6.2154, - "step": 330 - }, - { - "epoch": 0.01, - "learning_rate": 0.000402, - "loss": 6.0882, - "step": 335 - }, - { - "epoch": 0.01, - "learning_rate": 0.000408, - "loss": 6.2324, - "step": 340 - }, - { - "epoch": 0.01, - "learning_rate": 0.0004139999999999999, - "loss": 6.2041, - "step": 345 - }, - { - "epoch": 0.01, - "learning_rate": 0.00041999999999999996, - "loss": 6.0757, - "step": 350 - }, - { - "epoch": 0.01, - "learning_rate": 0.00042599999999999995, - "loss": 6.0776, - "step": 355 - }, - { - "epoch": 0.01, - "learning_rate": 0.00043199999999999993, - "loss": 6.032, - "step": 360 - }, - { - "epoch": 0.01, - "learning_rate": 0.00043799999999999997, - "loss": 6.1352, - "step": 365 - }, - { - "epoch": 0.01, - "learning_rate": 0.00044399999999999995, - "loss": 5.9888, - "step": 370 - }, - { - "epoch": 0.01, - "learning_rate": 0.00045, - "loss": 6.0221, - "step": 375 - }, - { - "epoch": 0.01, - "learning_rate": 0.00045599999999999997, - "loss": 6.0419, - "step": 380 - }, - { - "epoch": 0.01, - "learning_rate": 0.00046199999999999995, - "loss": 5.9985, - "step": 385 - }, - { - "epoch": 0.01, - "learning_rate": 0.000468, - "loss": 5.8769, - "step": 390 - }, - { - "epoch": 0.01, - "learning_rate": 0.000474, - "loss": 5.9902, - "step": 395 - }, - { - "epoch": 0.01, - "learning_rate": 0.00047999999999999996, - "loss": 5.9365, - "step": 400 - }, - { - "epoch": 0.01, - "learning_rate": 0.000486, - "loss": 5.895, - "step": 405 - }, - { - "epoch": 0.01, - "learning_rate": 0.0004919999999999999, - "loss": 5.9953, - "step": 410 - }, - { - "epoch": 0.01, - "learning_rate": 0.000498, - "loss": 5.8948, - "step": 415 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005039999999999999, - "loss": 5.9516, - "step": 420 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005099999999999999, - "loss": 5.8119, - "step": 425 - }, - { - "epoch": 0.01, - "learning_rate": 0.000516, - "loss": 5.8849, - "step": 430 - }, - { - "epoch": 0.01, - "learning_rate": 0.000522, - "loss": 5.8657, - "step": 435 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005279999999999999, - "loss": 5.7593, - "step": 440 - }, - { - "epoch": 0.01, - "learning_rate": 0.000534, - "loss": 5.8779, - "step": 445 - }, - { - "epoch": 0.01, - "learning_rate": 0.00054, - "loss": 5.8816, - "step": 450 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005459999999999999, - "loss": 5.813, - "step": 455 - }, - { - "epoch": 0.01, - "learning_rate": 0.000552, - "loss": 5.8118, - "step": 460 - }, - { - "epoch": 0.01, - "learning_rate": 0.000558, - "loss": 5.7594, - "step": 465 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005639999999999999, - "loss": 5.8195, - "step": 470 - }, - { - "epoch": 0.01, - "learning_rate": 0.00057, - "loss": 5.7352, - "step": 475 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005759999999999999, - "loss": 5.6983, - "step": 480 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005819999999999999, - "loss": 5.7638, - "step": 485 - }, - { - "epoch": 0.01, - "learning_rate": 0.000588, - "loss": 5.8061, - "step": 490 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005939999999999999, - "loss": 5.8155, - "step": 495 - }, - { - "epoch": 0.01, - "learning_rate": 0.0006, - "loss": 5.667, - "step": 500 - }, - { - "epoch": 0.01, - "eval_loss": 5.787242889404297, - "eval_runtime": 151.0777, - "eval_samples_per_second": 12.186, - "eval_steps_per_second": 0.768, - "step": 500 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999999887207687, - "loss": 5.7718, - "step": 505 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999999548830758, - "loss": 5.673, - "step": 510 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999998984869237, - "loss": 5.6713, - "step": 515 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999998195323167, - "loss": 5.6457, - "step": 520 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999997180192608, - "loss": 5.7369, - "step": 525 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999995939477636, - "loss": 5.7537, - "step": 530 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999994473178343, - "loss": 5.721, - "step": 535 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999992781294842, - "loss": 5.6126, - "step": 540 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999990863827257, - "loss": 5.5628, - "step": 545 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999988720775734, - "loss": 5.5213, - "step": 550 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999986352140434, - "loss": 5.5386, - "step": 555 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999983757921536, - "loss": 5.6003, - "step": 560 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999980938119233, - "loss": 5.5606, - "step": 565 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999977892733739, - "loss": 5.6028, - "step": 570 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999974621765282, - "loss": 5.6032, - "step": 575 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999971125214107, - "loss": 5.5227, - "step": 580 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999967403080478, - "loss": 5.5631, - "step": 585 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999963455364676, - "loss": 5.5767, - "step": 590 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999959282066996, - "loss": 5.5166, - "step": 595 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999954883187754, - "loss": 5.5309, - "step": 600 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999950258727278, - "loss": 5.4728, - "step": 605 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999945408685917, - "loss": 5.4537, - "step": 610 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999940333064035, - "loss": 5.4461, - "step": 615 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999935031862015, - "loss": 5.4692, - "step": 620 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999929505080256, - "loss": 5.4129, - "step": 625 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999923752719172, - "loss": 5.4537, - "step": 630 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999917774779196, - "loss": 5.3996, - "step": 635 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999911571260778, - "loss": 5.4168, - "step": 640 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999905142164384, - "loss": 5.4616, - "step": 645 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999898487490498, - "loss": 5.4191, - "step": 650 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999891607239619, - "loss": 5.3498, - "step": 655 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999884501412266, - "loss": 5.5168, - "step": 660 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999877170008972, - "loss": 5.4529, - "step": 665 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999869613030289, - "loss": 5.4247, - "step": 670 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999861830476786, - "loss": 5.3704, - "step": 675 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999853822349047, - "loss": 5.314, - "step": 680 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999845588647674, - "loss": 5.263, - "step": 685 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999837129373288, - "loss": 5.3963, - "step": 690 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999828444526523, - "loss": 5.3916, - "step": 695 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999819534108033, - "loss": 5.3184, - "step": 700 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999810398118487, - "loss": 5.4009, - "step": 705 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999801036558574, - "loss": 5.3957, - "step": 710 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999791449428996, - "loss": 5.2775, - "step": 715 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999781636730476, - "loss": 5.2826, - "step": 720 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999771598463749, - "loss": 5.2916, - "step": 725 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999761334629572, - "loss": 5.3436, - "step": 730 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999750845228717, - "loss": 5.3674, - "step": 735 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999740130261971, - "loss": 5.2603, - "step": 740 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999729189730139, - "loss": 5.2142, - "step": 745 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999718023634048, - "loss": 5.3698, - "step": 750 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999706631974534, - "loss": 5.2052, - "step": 755 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999695014752454, - "loss": 5.2727, - "step": 760 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999683171968683, - "loss": 5.2406, - "step": 765 - }, - { - "epoch": 0.01, - "learning_rate": 0.000599967110362411, - "loss": 5.2282, - "step": 770 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999658809719644, - "loss": 5.1632, - "step": 775 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999646290256208, - "loss": 5.1701, - "step": 780 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999633545234743, - "loss": 5.1785, - "step": 785 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999620574656209, - "loss": 5.2017, - "step": 790 - }, - { - "epoch": 0.01, - "learning_rate": 0.000599960737852158, - "loss": 5.2983, - "step": 795 - }, - { - "epoch": 0.01, - "learning_rate": 0.000599959395683185, - "loss": 5.1982, - "step": 800 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999580309588026, - "loss": 5.1416, - "step": 805 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999566436791137, - "loss": 5.1912, - "step": 810 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999552338442223, - "loss": 5.2655, - "step": 815 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999538014542347, - "loss": 5.188, - "step": 820 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999523465092583, - "loss": 5.1928, - "step": 825 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999508690094028, - "loss": 5.1108, - "step": 830 - }, - { - "epoch": 0.01, - "learning_rate": 0.000599949368954779, - "loss": 5.1586, - "step": 835 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999478463455001, - "loss": 5.1363, - "step": 840 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999463011816801, - "loss": 5.142, - "step": 845 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999447334634358, - "loss": 5.1496, - "step": 850 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999431431908844, - "loss": 5.052, - "step": 855 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999415303641459, - "loss": 5.0762, - "step": 860 - }, - { - "epoch": 0.01, - "learning_rate": 0.0005999398949833416, - "loss": 5.1514, - "step": 865 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999382370485942, - "loss": 5.1176, - "step": 870 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999365565600288, - "loss": 5.1016, - "step": 875 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999348535177713, - "loss": 5.1446, - "step": 880 - }, - { - "epoch": 0.02, - "learning_rate": 0.00059993312792195, - "loss": 4.9957, - "step": 885 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999313797726947, - "loss": 5.1482, - "step": 890 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999296090701366, - "loss": 5.0768, - "step": 895 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999278158144091, - "loss": 5.2004, - "step": 900 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999260000056469, - "loss": 5.0756, - "step": 905 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999241616439867, - "loss": 5.1679, - "step": 910 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999223007295666, - "loss": 5.0844, - "step": 915 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999204172625265, - "loss": 5.1273, - "step": 920 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999185112430082, - "loss": 5.0067, - "step": 925 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999165826711547, - "loss": 5.0033, - "step": 930 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999146315471115, - "loss": 5.0368, - "step": 935 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999126578710249, - "loss": 5.0261, - "step": 940 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999106616430435, - "loss": 5.0727, - "step": 945 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999086428633173, - "loss": 5.0198, - "step": 950 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999066015319981, - "loss": 5.0701, - "step": 955 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999045376492396, - "loss": 5.0427, - "step": 960 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999024512151967, - "loss": 5.1675, - "step": 965 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005999003422300267, - "loss": 5.0997, - "step": 970 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998982106938878, - "loss": 5.1069, - "step": 975 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998960566069404, - "loss": 5.0461, - "step": 980 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998938799693464, - "loss": 5.0177, - "step": 985 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998916807812696, - "loss": 4.9937, - "step": 990 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998894590428755, - "loss": 5.0079, - "step": 995 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998872147543309, - "loss": 4.9904, - "step": 1000 - }, - { - "epoch": 0.02, - "eval_loss": 5.063974857330322, - "eval_runtime": 150.7726, - "eval_samples_per_second": 12.21, - "eval_steps_per_second": 0.769, - "step": 1000 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998849479158045, - "loss": 5.0673, - "step": 1005 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998826585274671, - "loss": 4.9611, - "step": 1010 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998803465894905, - "loss": 5.0122, - "step": 1015 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998780121020487, - "loss": 5.0227, - "step": 1020 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998756550653174, - "loss": 4.9941, - "step": 1025 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998732754794736, - "loss": 5.0426, - "step": 1030 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998708733446962, - "loss": 4.9343, - "step": 1035 - }, - { - "epoch": 0.02, - "learning_rate": 0.000599868448661166, - "loss": 5.0083, - "step": 1040 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998660014290653, - "loss": 5.0275, - "step": 1045 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998635316485781, - "loss": 5.0044, - "step": 1050 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998610393198902, - "loss": 5.0977, - "step": 1055 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998585244431887, - "loss": 4.9482, - "step": 1060 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998559870186629, - "loss": 5.007, - "step": 1065 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998534270465038, - "loss": 4.9826, - "step": 1070 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998508445269035, - "loss": 4.9924, - "step": 1075 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998482394600566, - "loss": 4.9346, - "step": 1080 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998456118461587, - "loss": 4.9776, - "step": 1085 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998429616854074, - "loss": 5.0336, - "step": 1090 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998402889780021, - "loss": 4.9965, - "step": 1095 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998375937241438, - "loss": 4.9999, - "step": 1100 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998348759240351, - "loss": 4.9897, - "step": 1105 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998321355778803, - "loss": 4.904, - "step": 1110 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998293726858855, - "loss": 5.0172, - "step": 1115 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998265872482585, - "loss": 4.9544, - "step": 1120 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998237792652088, - "loss": 4.9249, - "step": 1125 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998209487369473, - "loss": 5.0203, - "step": 1130 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998180956636872, - "loss": 4.9552, - "step": 1135 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998152200456427, - "loss": 4.9835, - "step": 1140 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998123218830302, - "loss": 4.9234, - "step": 1145 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998094011760677, - "loss": 4.9115, - "step": 1150 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998064579249748, - "loss": 4.914, - "step": 1155 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998034921299726, - "loss": 4.9211, - "step": 1160 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005998005037912842, - "loss": 4.9112, - "step": 1165 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997974929091345, - "loss": 4.9139, - "step": 1170 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997944594837497, - "loss": 4.9096, - "step": 1175 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997914035153581, - "loss": 4.8065, - "step": 1180 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997883250041892, - "loss": 4.9562, - "step": 1185 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997852239504747, - "loss": 4.939, - "step": 1190 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997821003544478, - "loss": 4.8459, - "step": 1195 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997789542163432, - "loss": 4.8906, - "step": 1200 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997757855363976, - "loss": 4.8547, - "step": 1205 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997725943148494, - "loss": 4.9617, - "step": 1210 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997693805519383, - "loss": 4.8849, - "step": 1215 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997661442479061, - "loss": 4.8716, - "step": 1220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997628854029961, - "loss": 4.83, - "step": 1225 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997596040174534, - "loss": 4.8464, - "step": 1230 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997563000915248, - "loss": 4.8273, - "step": 1235 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997529736254585, - "loss": 4.8679, - "step": 1240 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997496246195049, - "loss": 4.8066, - "step": 1245 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997462530739157, - "loss": 4.7826, - "step": 1250 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997428589889444, - "loss": 4.8587, - "step": 1255 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997394423648463, - "loss": 4.8974, - "step": 1260 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997360032018783, - "loss": 4.8758, - "step": 1265 - }, - { - "epoch": 0.02, - "learning_rate": 0.000599732541500299, - "loss": 4.8788, - "step": 1270 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997290572603687, - "loss": 4.8967, - "step": 1275 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997255504823492, - "loss": 4.8774, - "step": 1280 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997220211665045, - "loss": 4.8542, - "step": 1285 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997184693130999, - "loss": 4.7275, - "step": 1290 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997148949224023, - "loss": 4.807, - "step": 1295 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997112979946809, - "loss": 4.8433, - "step": 1300 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997076785302057, - "loss": 4.9526, - "step": 1305 - }, - { - "epoch": 0.02, - "learning_rate": 0.000599704036529249, - "loss": 4.8017, - "step": 1310 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005997003719920847, - "loss": 4.8443, - "step": 1315 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996966849189884, - "loss": 4.823, - "step": 1320 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996929753102374, - "loss": 4.8656, - "step": 1325 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996892431661104, - "loss": 4.7528, - "step": 1330 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996854884868884, - "loss": 4.804, - "step": 1335 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996817112728534, - "loss": 4.8494, - "step": 1340 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996779115242896, - "loss": 4.8679, - "step": 1345 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996740892414827, - "loss": 4.7677, - "step": 1350 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996702444247201, - "loss": 4.7851, - "step": 1355 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996663770742908, - "loss": 4.8101, - "step": 1360 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996624871904858, - "loss": 4.8664, - "step": 1365 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996585747735975, - "loss": 4.8919, - "step": 1370 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996546398239201, - "loss": 4.7878, - "step": 1375 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996506823417495, - "loss": 4.8748, - "step": 1380 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996467023273832, - "loss": 4.8132, - "step": 1385 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996426997811207, - "loss": 4.8323, - "step": 1390 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996386747032627, - "loss": 4.7567, - "step": 1395 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996346270941119, - "loss": 4.7993, - "step": 1400 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996305569539728, - "loss": 4.7315, - "step": 1405 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996264642831516, - "loss": 4.8493, - "step": 1410 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996223490819556, - "loss": 4.7694, - "step": 1415 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996182113506947, - "loss": 4.7332, - "step": 1420 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996140510896796, - "loss": 4.8223, - "step": 1425 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996098682992235, - "loss": 4.7319, - "step": 1430 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996056629796408, - "loss": 4.8006, - "step": 1435 - }, - { - "epoch": 0.02, - "learning_rate": 0.0005996014351312476, - "loss": 4.844, - "step": 1440 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995971847543621, - "loss": 4.7525, - "step": 1445 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995929118493034, - "loss": 4.7649, - "step": 1450 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995886164163934, - "loss": 4.7703, - "step": 1455 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995842984559547, - "loss": 4.8856, - "step": 1460 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995799579683121, - "loss": 4.7844, - "step": 1465 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995755949537919, - "loss": 4.7843, - "step": 1470 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995712094127224, - "loss": 4.7452, - "step": 1475 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995668013454332, - "loss": 4.7533, - "step": 1480 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995623707522558, - "loss": 4.7856, - "step": 1485 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995579176335233, - "loss": 4.8798, - "step": 1490 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995534419895707, - "loss": 4.7446, - "step": 1495 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995489438207344, - "loss": 4.7522, - "step": 1500 - }, - { - "epoch": 0.03, - "eval_loss": 4.799483776092529, - "eval_runtime": 150.7804, - "eval_samples_per_second": 12.21, - "eval_steps_per_second": 0.769, - "step": 1500 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995444231273526, - "loss": 4.6847, - "step": 1505 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995398799097654, - "loss": 4.846, - "step": 1510 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995353141683144, - "loss": 4.7079, - "step": 1515 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995307259033428, - "loss": 4.7785, - "step": 1520 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995261151151957, - "loss": 4.8516, - "step": 1525 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995214818042198, - "loss": 4.7627, - "step": 1530 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995168259707635, - "loss": 4.7492, - "step": 1535 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995121476151769, - "loss": 4.767, - "step": 1540 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995074467378117, - "loss": 4.7328, - "step": 1545 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005995027233390215, - "loss": 4.622, - "step": 1550 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994979774191614, - "loss": 4.7061, - "step": 1555 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994932089785883, - "loss": 4.7718, - "step": 1560 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994884180176608, - "loss": 4.8252, - "step": 1565 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994836045367391, - "loss": 4.7833, - "step": 1570 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994787685361852, - "loss": 4.7973, - "step": 1575 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994739100163627, - "loss": 4.8151, - "step": 1580 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994690289776368, - "loss": 4.6815, - "step": 1585 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994641254203749, - "loss": 4.6592, - "step": 1590 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994591993449455, - "loss": 4.6454, - "step": 1595 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994542507517188, - "loss": 4.7652, - "step": 1600 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994492796410673, - "loss": 4.6894, - "step": 1605 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994442860133647, - "loss": 4.7193, - "step": 1610 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994392698689862, - "loss": 4.6321, - "step": 1615 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994342312083093, - "loss": 4.7376, - "step": 1620 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994291700317127, - "loss": 4.7212, - "step": 1625 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994240863395772, - "loss": 4.6763, - "step": 1630 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994189801322849, - "loss": 4.6659, - "step": 1635 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994138514102198, - "loss": 4.6895, - "step": 1640 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994087001737674, - "loss": 4.7019, - "step": 1645 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005994035264233154, - "loss": 4.6882, - "step": 1650 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993983301592525, - "loss": 4.724, - "step": 1655 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993931113819697, - "loss": 4.6239, - "step": 1660 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993878700918592, - "loss": 4.5808, - "step": 1665 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993826062893152, - "loss": 4.6235, - "step": 1670 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993773199747335, - "loss": 4.6813, - "step": 1675 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993720111485115, - "loss": 4.6841, - "step": 1680 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993666798110488, - "loss": 4.6773, - "step": 1685 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993613259627458, - "loss": 4.6088, - "step": 1690 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993559496040054, - "loss": 4.592, - "step": 1695 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993505507352317, - "loss": 4.7731, - "step": 1700 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993451293568307, - "loss": 4.7196, - "step": 1705 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993396854692102, - "loss": 4.6916, - "step": 1710 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993342190727793, - "loss": 4.6838, - "step": 1715 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993287301679494, - "loss": 4.6393, - "step": 1720 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993232187551328, - "loss": 4.705, - "step": 1725 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993176848347444, - "loss": 4.6213, - "step": 1730 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993121284071999, - "loss": 4.7283, - "step": 1735 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993065494729173, - "loss": 4.6904, - "step": 1740 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005993009480323162, - "loss": 4.7068, - "step": 1745 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992953240858177, - "loss": 4.6864, - "step": 1750 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992896776338446, - "loss": 4.6971, - "step": 1755 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992840086768216, - "loss": 4.7385, - "step": 1760 - }, - { - "epoch": 0.03, - "learning_rate": 0.000599278317215175, - "loss": 4.6804, - "step": 1765 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992726032493326, - "loss": 4.7191, - "step": 1770 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992668667797244, - "loss": 4.5827, - "step": 1775 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992611078067813, - "loss": 4.7553, - "step": 1780 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992553263309367, - "loss": 4.6555, - "step": 1785 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992495223526253, - "loss": 4.5248, - "step": 1790 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992436958722834, - "loss": 4.4933, - "step": 1795 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992378468903489, - "loss": 4.622, - "step": 1800 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992319754072622, - "loss": 4.5995, - "step": 1805 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992260814234644, - "loss": 4.6473, - "step": 1810 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992201649393989, - "loss": 4.6518, - "step": 1815 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992142259555103, - "loss": 4.6183, - "step": 1820 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992082644722454, - "loss": 4.5708, - "step": 1825 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005992022804900525, - "loss": 4.5672, - "step": 1830 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005991962740093813, - "loss": 4.5723, - "step": 1835 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005991902450306839, - "loss": 4.574, - "step": 1840 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005991841935544133, - "loss": 4.6351, - "step": 1845 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005991781195810246, - "loss": 4.6235, - "step": 1850 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005991720231109745, - "loss": 4.6356, - "step": 1855 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005991659041447217, - "loss": 4.6934, - "step": 1860 - }, - { - "epoch": 0.03, - "learning_rate": 0.000599159762682726, - "loss": 4.6972, - "step": 1865 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005991535987254494, - "loss": 4.5976, - "step": 1870 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005991474122733552, - "loss": 4.5597, - "step": 1875 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005991412033269087, - "loss": 4.552, - "step": 1880 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005991349718865768, - "loss": 4.5887, - "step": 1885 - }, - { - "epoch": 0.03, - "learning_rate": 0.000599128717952828, - "loss": 4.6184, - "step": 1890 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005991224415261328, - "loss": 4.6031, - "step": 1895 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005991161426069629, - "loss": 4.6395, - "step": 1900 - }, - { - "epoch": 0.03, - "learning_rate": 0.000599109821195792, - "loss": 4.6755, - "step": 1905 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005991034772930955, - "loss": 4.5333, - "step": 1910 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990971108993503, - "loss": 4.5863, - "step": 1915 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990907220150353, - "loss": 4.5263, - "step": 1920 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990843106406308, - "loss": 4.5766, - "step": 1925 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990778767766188, - "loss": 4.6123, - "step": 1930 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990714204234833, - "loss": 4.5906, - "step": 1935 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990649415817097, - "loss": 4.592, - "step": 1940 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990584402517853, - "loss": 4.5306, - "step": 1945 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990519164341987, - "loss": 4.538, - "step": 1950 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990453701294406, - "loss": 4.7278, - "step": 1955 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990388013380033, - "loss": 4.5994, - "step": 1960 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990322100603806, - "loss": 4.6766, - "step": 1965 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990255962970684, - "loss": 4.5969, - "step": 1970 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990189600485637, - "loss": 4.5924, - "step": 1975 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005990123013153657, - "loss": 4.6688, - "step": 1980 - }, - { - "epoch": 0.03, - "learning_rate": 0.000599005620097975, - "loss": 4.6841, - "step": 1985 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005989989163968941, - "loss": 4.6268, - "step": 1990 - }, - { - "epoch": 0.03, - "learning_rate": 0.000598992190212627, - "loss": 4.6271, - "step": 1995 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005989854415456796, - "loss": 4.5934, - "step": 2000 - }, - { - "epoch": 0.03, - "eval_loss": 4.622138977050781, - "eval_runtime": 150.27, - "eval_samples_per_second": 12.251, - "eval_steps_per_second": 0.772, - "step": 2000 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005989786703965592, - "loss": 4.6169, - "step": 2005 - }, - { - "epoch": 0.03, - "learning_rate": 0.000598971876765775, - "loss": 4.5989, - "step": 2010 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005989650606538379, - "loss": 4.5947, - "step": 2015 - }, - { - "epoch": 0.03, - "learning_rate": 0.0005989582220612605, - "loss": 4.6156, - "step": 2020 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005989513609885568, - "loss": 4.6182, - "step": 2025 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005989444774362429, - "loss": 4.6214, - "step": 2030 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005989375714048363, - "loss": 4.6239, - "step": 2035 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005989306428948565, - "loss": 4.5639, - "step": 2040 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005989236919068243, - "loss": 4.5897, - "step": 2045 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005989167184412624, - "loss": 4.6397, - "step": 2050 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005989097224986952, - "loss": 4.6781, - "step": 2055 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005989027040796488, - "loss": 4.6692, - "step": 2060 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988956631846509, - "loss": 4.5066, - "step": 2065 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988885998142308, - "loss": 4.5364, - "step": 2070 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988815139689199, - "loss": 4.5657, - "step": 2075 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988744056492508, - "loss": 4.4681, - "step": 2080 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988672748557583, - "loss": 4.5448, - "step": 2085 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988601215889782, - "loss": 4.5786, - "step": 2090 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988529458494488, - "loss": 4.5426, - "step": 2095 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988457476377092, - "loss": 4.5494, - "step": 2100 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988385269543011, - "loss": 4.5845, - "step": 2105 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988312837997673, - "loss": 4.5108, - "step": 2110 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988240181746524, - "loss": 4.5646, - "step": 2115 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988167300795029, - "loss": 4.5582, - "step": 2120 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988094195148666, - "loss": 4.5773, - "step": 2125 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005988020864812933, - "loss": 4.5119, - "step": 2130 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005987947309793344, - "loss": 4.4035, - "step": 2135 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005987873530095432, - "loss": 4.4899, - "step": 2140 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005987799525724742, - "loss": 4.56, - "step": 2145 - }, - { - "epoch": 0.04, - "learning_rate": 0.000598772529668684, - "loss": 4.4948, - "step": 2150 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005987650842987307, - "loss": 4.4912, - "step": 2155 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005987576164631743, - "loss": 4.6292, - "step": 2160 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005987501261625762, - "loss": 4.563, - "step": 2165 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005987426133974997, - "loss": 4.5965, - "step": 2170 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005987350781685097, - "loss": 4.546, - "step": 2175 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005987275204761729, - "loss": 4.6035, - "step": 2180 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005987199403210574, - "loss": 4.6014, - "step": 2185 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005987123377037333, - "loss": 4.5493, - "step": 2190 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005987047126247723, - "loss": 4.5345, - "step": 2195 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005986970650847478, - "loss": 4.5793, - "step": 2200 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005986893950842347, - "loss": 4.5242, - "step": 2205 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005986817026238099, - "loss": 4.5865, - "step": 2210 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005986739877040518, - "loss": 4.5695, - "step": 2215 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005986662503255405, - "loss": 4.5274, - "step": 2220 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005986584904888579, - "loss": 4.5146, - "step": 2225 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005986507081945872, - "loss": 4.4585, - "step": 2230 - }, - { - "epoch": 0.04, - "learning_rate": 0.000598642903443314, - "loss": 4.5071, - "step": 2235 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005986350762356249, - "loss": 4.6897, - "step": 2240 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005986272265721086, - "loss": 4.5492, - "step": 2245 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005986193544533553, - "loss": 4.4015, - "step": 2250 - }, - { - "epoch": 0.04, - "learning_rate": 0.000598611459879957, - "loss": 4.5966, - "step": 2255 - }, - { - "epoch": 0.04, - "learning_rate": 0.000598603542852507, - "loss": 4.5583, - "step": 2260 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005985956033716011, - "loss": 4.4743, - "step": 2265 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005985876414378361, - "loss": 4.557, - "step": 2270 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005985796570518108, - "loss": 4.5134, - "step": 2275 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005985716502141254, - "loss": 4.5188, - "step": 2280 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005985636209253821, - "loss": 4.5209, - "step": 2285 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005985555691861845, - "loss": 4.4911, - "step": 2290 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005985474949971383, - "loss": 4.5194, - "step": 2295 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005985393983588503, - "loss": 4.5059, - "step": 2300 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005985312792719297, - "loss": 4.4679, - "step": 2305 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005985231377369869, - "loss": 4.5583, - "step": 2310 - }, - { - "epoch": 0.04, - "learning_rate": 0.000598514973754634, - "loss": 4.5782, - "step": 2315 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005985067873254848, - "loss": 4.5104, - "step": 2320 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005984985784501551, - "loss": 4.5072, - "step": 2325 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005984903471292621, - "loss": 4.435, - "step": 2330 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005984820933634247, - "loss": 4.4558, - "step": 2335 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005984738171532636, - "loss": 4.5055, - "step": 2340 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005984655184994011, - "loss": 4.521, - "step": 2345 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005984571974024614, - "loss": 4.4612, - "step": 2350 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005984488538630698, - "loss": 4.4435, - "step": 2355 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005984404878818539, - "loss": 4.5881, - "step": 2360 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005984320994594429, - "loss": 4.5542, - "step": 2365 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005984236885964674, - "loss": 4.3867, - "step": 2370 - }, - { - "epoch": 0.04, - "learning_rate": 0.00059841525529356, - "loss": 4.5786, - "step": 2375 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005984067995513547, - "loss": 4.5679, - "step": 2380 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005983983213704874, - "loss": 4.4752, - "step": 2385 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005983898207515957, - "loss": 4.4446, - "step": 2390 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005983812976953185, - "loss": 4.4343, - "step": 2395 - }, - { - "epoch": 0.04, - "learning_rate": 0.000598372752202297, - "loss": 4.3717, - "step": 2400 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005983641842731736, - "loss": 4.4689, - "step": 2405 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005983555939085927, - "loss": 4.4949, - "step": 2410 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005983469811092002, - "loss": 4.5136, - "step": 2415 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005983383458756437, - "loss": 4.4673, - "step": 2420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005983296882085726, - "loss": 4.521, - "step": 2425 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005983210081086378, - "loss": 4.4922, - "step": 2430 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005983123055764921, - "loss": 4.5239, - "step": 2435 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005983035806127897, - "loss": 4.4424, - "step": 2440 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005982948332181871, - "loss": 4.4227, - "step": 2445 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005982860633933415, - "loss": 4.5284, - "step": 2450 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005982772711389128, - "loss": 4.5241, - "step": 2455 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005982684564555619, - "loss": 4.4501, - "step": 2460 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005982596193439517, - "loss": 4.4509, - "step": 2465 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005982507598047466, - "loss": 4.4052, - "step": 2470 - }, - { - "epoch": 0.04, - "learning_rate": 0.000598241877838613, - "loss": 4.4956, - "step": 2475 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005982329734462186, - "loss": 4.3242, - "step": 2480 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005982240466282329, - "loss": 4.392, - "step": 2485 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005982150973853274, - "loss": 4.5077, - "step": 2490 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005982061257181749, - "loss": 4.4919, - "step": 2495 - }, - { - "epoch": 0.04, - "learning_rate": 0.00059819713162745, - "loss": 4.5178, - "step": 2500 - }, - { - "epoch": 0.04, - "eval_loss": 4.496661186218262, - "eval_runtime": 150.774, - "eval_samples_per_second": 12.21, - "eval_steps_per_second": 0.769, - "step": 2500 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005981881151138291, - "loss": 4.5157, - "step": 2505 - }, - { - "epoch": 0.04, - "learning_rate": 0.00059817907617799, - "loss": 4.4359, - "step": 2510 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005981700148206126, - "loss": 4.4987, - "step": 2515 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005981609310423781, - "loss": 4.3829, - "step": 2520 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005981518248439697, - "loss": 4.4533, - "step": 2525 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005981426962260722, - "loss": 4.4787, - "step": 2530 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005981335451893718, - "loss": 4.4843, - "step": 2535 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005981243717345567, - "loss": 4.4322, - "step": 2540 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005981151758623167, - "loss": 4.504, - "step": 2545 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005981059575733432, - "loss": 4.4743, - "step": 2550 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005980967168683297, - "loss": 4.5483, - "step": 2555 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005980874537479706, - "loss": 4.4412, - "step": 2560 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005980781682129627, - "loss": 4.4774, - "step": 2565 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005980688602640042, - "loss": 4.3842, - "step": 2570 - }, - { - "epoch": 0.04, - "learning_rate": 0.000598059529901795, - "loss": 4.453, - "step": 2575 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005980501771270366, - "loss": 4.3691, - "step": 2580 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005980408019404324, - "loss": 4.4649, - "step": 2585 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005980314043426874, - "loss": 4.4571, - "step": 2590 - }, - { - "epoch": 0.04, - "learning_rate": 0.0005980219843345082, - "loss": 4.2869, - "step": 2595 - }, - { - "epoch": 0.04, - "learning_rate": 0.000598012541916603, - "loss": 4.442, - "step": 2600 - }, - { - "epoch": 0.05, - "learning_rate": 0.000598003077089682, - "loss": 4.4842, - "step": 2605 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005979935898544568, - "loss": 4.4487, - "step": 2610 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005979840802116409, - "loss": 4.4768, - "step": 2615 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005979745481619493, - "loss": 4.4794, - "step": 2620 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005979649937060988, - "loss": 4.4843, - "step": 2625 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005979554168448078, - "loss": 4.4868, - "step": 2630 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005979458175787963, - "loss": 4.52, - "step": 2635 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005979361959087865, - "loss": 4.5309, - "step": 2640 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005979265518355015, - "loss": 4.4523, - "step": 2645 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005979168853596668, - "loss": 4.4153, - "step": 2650 - }, - { - "epoch": 0.05, - "learning_rate": 0.000597907196482009, - "loss": 4.3197, - "step": 2655 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005978974852032568, - "loss": 4.4264, - "step": 2660 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005978877515241404, - "loss": 4.4645, - "step": 2665 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005978779954453917, - "loss": 4.4296, - "step": 2670 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005978682169677444, - "loss": 4.4271, - "step": 2675 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005978584160919339, - "loss": 4.4266, - "step": 2680 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005978485928186968, - "loss": 4.4608, - "step": 2685 - }, - { - "epoch": 0.05, - "learning_rate": 0.000597838747148772, - "loss": 4.455, - "step": 2690 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005978288790828998, - "loss": 4.4432, - "step": 2695 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005978189886218223, - "loss": 4.4275, - "step": 2700 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005978090757662831, - "loss": 4.4242, - "step": 2705 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005977991405170277, - "loss": 4.4197, - "step": 2710 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005977891828748031, - "loss": 4.3557, - "step": 2715 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005977792028403581, - "loss": 4.4138, - "step": 2720 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005977692004144431, - "loss": 4.4321, - "step": 2725 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005977591755978104, - "loss": 4.3348, - "step": 2730 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005977491283912135, - "loss": 4.3667, - "step": 2735 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005977390587954082, - "loss": 4.5263, - "step": 2740 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005977289668111515, - "loss": 4.4123, - "step": 2745 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005977188524392025, - "loss": 4.4297, - "step": 2750 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005977087156803214, - "loss": 4.4391, - "step": 2755 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005976985565352707, - "loss": 4.4199, - "step": 2760 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005976883750048142, - "loss": 4.4723, - "step": 2765 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005976781710897175, - "loss": 4.3106, - "step": 2770 - }, - { - "epoch": 0.05, - "learning_rate": 0.000597667944790748, - "loss": 4.4756, - "step": 2775 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005976576961086745, - "loss": 4.4701, - "step": 2780 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005976474250442678, - "loss": 4.316, - "step": 2785 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005976371315983, - "loss": 4.3714, - "step": 2790 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005976268157715453, - "loss": 4.2413, - "step": 2795 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005976164775647795, - "loss": 4.4715, - "step": 2800 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005976061169787798, - "loss": 4.4679, - "step": 2805 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005975957340143254, - "loss": 4.3745, - "step": 2810 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005975853286721968, - "loss": 4.348, - "step": 2815 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005975749009531767, - "loss": 4.4823, - "step": 2820 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005975644508580489, - "loss": 4.3, - "step": 2825 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005975539783875995, - "loss": 4.3079, - "step": 2830 - }, - { - "epoch": 0.05, - "learning_rate": 0.000597543483542616, - "loss": 4.4583, - "step": 2835 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005975329663238872, - "loss": 4.3141, - "step": 2840 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005975224267322043, - "loss": 4.4546, - "step": 2845 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005975118647683597, - "loss": 4.4115, - "step": 2850 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005975012804331476, - "loss": 4.3319, - "step": 2855 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005974906737273638, - "loss": 4.3583, - "step": 2860 - }, - { - "epoch": 0.05, - "learning_rate": 0.000597480044651806, - "loss": 4.3746, - "step": 2865 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005974693932072734, - "loss": 4.3578, - "step": 2870 - }, - { - "epoch": 0.05, - "learning_rate": 0.000597458719394567, - "loss": 4.4066, - "step": 2875 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005974480232144891, - "loss": 4.3589, - "step": 2880 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005974373046678445, - "loss": 4.4172, - "step": 2885 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005974265637554389, - "loss": 4.4344, - "step": 2890 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005974158004780799, - "loss": 4.3983, - "step": 2895 - }, - { - "epoch": 0.05, - "learning_rate": 0.000597405014836577, - "loss": 4.3429, - "step": 2900 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005973942068317411, - "loss": 4.3561, - "step": 2905 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005973833764643851, - "loss": 4.3462, - "step": 2910 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005973725237353231, - "loss": 4.3672, - "step": 2915 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005973616486453715, - "loss": 4.4243, - "step": 2920 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005973507511953478, - "loss": 4.4428, - "step": 2925 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005973398313860714, - "loss": 4.3471, - "step": 2930 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005973288892183636, - "loss": 4.2282, - "step": 2935 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005973179246930472, - "loss": 4.3884, - "step": 2940 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005973069378109466, - "loss": 4.3978, - "step": 2945 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005972959285728877, - "loss": 4.4103, - "step": 2950 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005972848969796988, - "loss": 4.3988, - "step": 2955 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005972738430322092, - "loss": 4.4573, - "step": 2960 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005972627667312501, - "loss": 4.3979, - "step": 2965 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005972516680776543, - "loss": 4.3337, - "step": 2970 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005972405470722565, - "loss": 4.4286, - "step": 2975 - }, - { - "epoch": 0.05, - "learning_rate": 0.000597229403715893, - "loss": 4.3805, - "step": 2980 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005972182380094015, - "loss": 4.3886, - "step": 2985 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005972070499536217, - "loss": 4.3983, - "step": 2990 - }, - { - "epoch": 0.05, - "learning_rate": 0.000597195839549395, - "loss": 4.4782, - "step": 2995 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005971846067975642, - "loss": 4.3503, - "step": 3000 - }, - { - "epoch": 0.05, - "eval_loss": 4.413268089294434, - "eval_runtime": 150.2737, - "eval_samples_per_second": 12.251, - "eval_steps_per_second": 0.772, - "step": 3000 - }, - { - "epoch": 0.05, - "learning_rate": 0.000597173351698974, - "loss": 4.4155, - "step": 3005 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005971620742544709, - "loss": 4.4261, - "step": 3010 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005971507744649025, - "loss": 4.348, - "step": 3015 - }, - { - "epoch": 0.05, - "learning_rate": 0.000597139452331119, - "loss": 4.4353, - "step": 3020 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005971281078539712, - "loss": 4.3666, - "step": 3025 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005971167410343127, - "loss": 4.4247, - "step": 3030 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005971053518729979, - "loss": 4.4093, - "step": 3035 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005970939403708832, - "loss": 4.4617, - "step": 3040 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005970825065288269, - "loss": 4.3769, - "step": 3045 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005970710503476885, - "loss": 4.3765, - "step": 3050 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005970595718283296, - "loss": 4.4443, - "step": 3055 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005970480709716134, - "loss": 4.4562, - "step": 3060 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005970365477784046, - "loss": 4.4314, - "step": 3065 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005970250022495696, - "loss": 4.3781, - "step": 3070 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005970134343859767, - "loss": 4.2969, - "step": 3075 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005970018441884957, - "loss": 4.3114, - "step": 3080 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005969902316579982, - "loss": 4.4437, - "step": 3085 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005969785967953572, - "loss": 4.4663, - "step": 3090 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005969669396014479, - "loss": 4.397, - "step": 3095 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005969552600771465, - "loss": 4.3915, - "step": 3100 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005969435582233314, - "loss": 4.3023, - "step": 3105 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005969318340408827, - "loss": 4.2951, - "step": 3110 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005969200875306817, - "loss": 4.4636, - "step": 3115 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005969083186936118, - "loss": 4.3918, - "step": 3120 - }, - { - "epoch": 0.05, - "learning_rate": 0.000596896527530558, - "loss": 4.3059, - "step": 3125 - }, - { - "epoch": 0.05, - "learning_rate": 0.000596884714042407, - "loss": 4.2898, - "step": 3130 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005968728782300469, - "loss": 4.3249, - "step": 3135 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005968610200943678, - "loss": 4.3538, - "step": 3140 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005968491396362614, - "loss": 4.2482, - "step": 3145 - }, - { - "epoch": 0.05, - "learning_rate": 0.000596837236856621, - "loss": 4.3515, - "step": 3150 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005968253117563417, - "loss": 4.4214, - "step": 3155 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005968133643363202, - "loss": 4.3378, - "step": 3160 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005968013945974549, - "loss": 4.3318, - "step": 3165 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005967894025406456, - "loss": 4.2822, - "step": 3170 - }, - { - "epoch": 0.05, - "learning_rate": 0.0005967773881667944, - "loss": 4.2495, - "step": 3175 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005967653514768046, - "loss": 4.268, - "step": 3180 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005967532924715811, - "loss": 4.3754, - "step": 3185 - }, - { - "epoch": 0.06, - "learning_rate": 0.000596741211152031, - "loss": 4.4025, - "step": 3190 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005967291075190625, - "loss": 4.4083, - "step": 3195 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005967169815735858, - "loss": 4.3213, - "step": 3200 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005967048333165128, - "loss": 4.3461, - "step": 3205 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005966926627487569, - "loss": 4.3768, - "step": 3210 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005966804698712333, - "loss": 4.4196, - "step": 3215 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005966682546848589, - "loss": 4.5157, - "step": 3220 - }, - { - "epoch": 0.06, - "learning_rate": 0.000596656017190552, - "loss": 4.5326, - "step": 3225 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005966437573892331, - "loss": 4.4102, - "step": 3230 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005966314752818239, - "loss": 4.405, - "step": 3235 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005966191708692478, - "loss": 4.4479, - "step": 3240 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005966068441524304, - "loss": 4.2742, - "step": 3245 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005965944951322984, - "loss": 4.4105, - "step": 3250 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005965821238097803, - "loss": 4.3222, - "step": 3255 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005965697301858064, - "loss": 4.3689, - "step": 3260 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005965573142613088, - "loss": 4.3729, - "step": 3265 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005965448760372209, - "loss": 4.3759, - "step": 3270 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005965324155144782, - "loss": 4.3199, - "step": 3275 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005965199326940174, - "loss": 4.352, - "step": 3280 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005965074275767774, - "loss": 4.3888, - "step": 3285 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005964949001636985, - "loss": 4.2492, - "step": 3290 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005964823504557226, - "loss": 4.2244, - "step": 3295 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005964697784537933, - "loss": 4.2883, - "step": 3300 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005964571841588561, - "loss": 4.371, - "step": 3305 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005964445675718579, - "loss": 4.334, - "step": 3310 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005964319286937475, - "loss": 4.3165, - "step": 3315 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005964192675254753, - "loss": 4.2529, - "step": 3320 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005964065840679933, - "loss": 4.3381, - "step": 3325 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005963938783222553, - "loss": 4.283, - "step": 3330 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005963811502892165, - "loss": 4.3161, - "step": 3335 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005963683999698342, - "loss": 4.3748, - "step": 3340 - }, - { - "epoch": 0.06, - "learning_rate": 0.000596355627365067, - "loss": 4.3108, - "step": 3345 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005963428324758755, - "loss": 4.3598, - "step": 3350 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005963300153032217, - "loss": 4.3048, - "step": 3355 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005963171758480695, - "loss": 4.3148, - "step": 3360 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005963043141113842, - "loss": 4.1902, - "step": 3365 - }, - { - "epoch": 0.06, - "learning_rate": 0.000596291430094133, - "loss": 4.2621, - "step": 3370 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005962785237972848, - "loss": 4.3392, - "step": 3375 - }, - { - "epoch": 0.06, - "learning_rate": 0.00059626559522181, - "loss": 4.4537, - "step": 3380 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005962526443686807, - "loss": 4.3562, - "step": 3385 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005962396712388709, - "loss": 4.3759, - "step": 3390 - }, - { - "epoch": 0.06, - "learning_rate": 0.000596226675833356, - "loss": 4.3024, - "step": 3395 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005962136581531132, - "loss": 4.3345, - "step": 3400 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005962006181991214, - "loss": 4.3228, - "step": 3405 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005961875559723611, - "loss": 4.235, - "step": 3410 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005961744714738146, - "loss": 4.2656, - "step": 3415 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005961613647044656, - "loss": 4.404, - "step": 3420 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005961482356652998, - "loss": 4.2853, - "step": 3425 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005961350843573044, - "loss": 4.294, - "step": 3430 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005961219107814684, - "loss": 4.2044, - "step": 3435 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005961087149387824, - "loss": 4.2688, - "step": 3440 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005960954968302383, - "loss": 4.2032, - "step": 3445 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005960822564568305, - "loss": 4.3575, - "step": 3450 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005960689938195544, - "loss": 4.368, - "step": 3455 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005960557089194072, - "loss": 4.2639, - "step": 3460 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005960424017573881, - "loss": 4.3362, - "step": 3465 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005960290723344974, - "loss": 4.2979, - "step": 3470 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005960157206517376, - "loss": 4.3052, - "step": 3475 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005960023467101128, - "loss": 4.3042, - "step": 3480 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005959889505106284, - "loss": 4.2859, - "step": 3485 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005959755320542919, - "loss": 4.3849, - "step": 3490 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005959620913421123, - "loss": 4.3201, - "step": 3495 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005959486283751001, - "loss": 4.41, - "step": 3500 - }, - { - "epoch": 0.06, - "eval_loss": 4.344966411590576, - "eval_runtime": 150.7763, - "eval_samples_per_second": 12.21, - "eval_steps_per_second": 0.769, - "step": 3500 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005959351431542678, - "loss": 4.3984, - "step": 3505 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005959216356806294, - "loss": 4.3778, - "step": 3510 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005959081059552005, - "loss": 4.3246, - "step": 3515 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005958945539789987, - "loss": 4.2603, - "step": 3520 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005958809797530427, - "loss": 4.2545, - "step": 3525 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005958673832783534, - "loss": 4.268, - "step": 3530 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005958537645559533, - "loss": 4.2577, - "step": 3535 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005958401235868663, - "loss": 4.3378, - "step": 3540 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005958264603721181, - "loss": 4.3697, - "step": 3545 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005958127749127361, - "loss": 4.4162, - "step": 3550 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005957990672097495, - "loss": 4.351, - "step": 3555 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005957853372641891, - "loss": 4.3266, - "step": 3560 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005957715850770871, - "loss": 4.2015, - "step": 3565 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005957578106494778, - "loss": 4.1827, - "step": 3570 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005957440139823968, - "loss": 4.4158, - "step": 3575 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005957301950768816, - "loss": 4.2008, - "step": 3580 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005957163539339714, - "loss": 4.1551, - "step": 3585 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005957024905547068, - "loss": 4.2472, - "step": 3590 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005956886049401305, - "loss": 4.2492, - "step": 3595 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005956746970912863, - "loss": 4.2584, - "step": 3600 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005956607670092204, - "loss": 4.3039, - "step": 3605 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005956468146949799, - "loss": 4.2622, - "step": 3610 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005956328401496141, - "loss": 4.2605, - "step": 3615 - }, - { - "epoch": 0.06, - "learning_rate": 0.000595618843374174, - "loss": 4.2973, - "step": 3620 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005956048243697117, - "loss": 4.218, - "step": 3625 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005955907831372817, - "loss": 4.3062, - "step": 3630 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005955767196779395, - "loss": 4.3318, - "step": 3635 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005955626339927431, - "loss": 4.4257, - "step": 3640 - }, - { - "epoch": 0.06, - "learning_rate": 0.000595548526082751, - "loss": 4.1505, - "step": 3645 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005955343959490246, - "loss": 4.2879, - "step": 3650 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005955202435926262, - "loss": 4.2549, - "step": 3655 - }, - { - "epoch": 0.06, - "learning_rate": 0.00059550606901462, - "loss": 4.2972, - "step": 3660 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005954918722160718, - "loss": 4.253, - "step": 3665 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005954776531980492, - "loss": 4.3401, - "step": 3670 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005954634119616215, - "loss": 4.2288, - "step": 3675 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005954491485078592, - "loss": 4.2791, - "step": 3680 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005954348628378353, - "loss": 4.2321, - "step": 3685 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005954205549526236, - "loss": 4.2922, - "step": 3690 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005954062248533003, - "loss": 4.2489, - "step": 3695 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005953918725409429, - "loss": 4.3511, - "step": 3700 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005953774980166304, - "loss": 4.3664, - "step": 3705 - }, - { - "epoch": 0.06, - "learning_rate": 0.000595363101281444, - "loss": 4.2423, - "step": 3710 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005953486823364659, - "loss": 4.2646, - "step": 3715 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005953342411827807, - "loss": 4.2715, - "step": 3720 - }, - { - "epoch": 0.06, - "learning_rate": 0.000595319777821474, - "loss": 4.3321, - "step": 3725 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005953052922536335, - "loss": 4.2929, - "step": 3730 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005952907844803486, - "loss": 4.2246, - "step": 3735 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005952762545027099, - "loss": 4.2096, - "step": 3740 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005952617023218101, - "loss": 4.312, - "step": 3745 - }, - { - "epoch": 0.06, - "learning_rate": 0.0005952471279387435, - "loss": 4.2608, - "step": 3750 - }, - { - "epoch": 0.06, - "learning_rate": 0.000595232531354606, - "loss": 4.3276, - "step": 3755 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005952179125704952, - "loss": 4.335, - "step": 3760 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005952032715875105, - "loss": 4.2907, - "step": 3765 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005951886084067525, - "loss": 4.3741, - "step": 3770 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005951739230293239, - "loss": 4.3517, - "step": 3775 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005951592154563292, - "loss": 4.277, - "step": 3780 - }, - { - "epoch": 0.07, - "learning_rate": 0.000595144485688874, - "loss": 4.3368, - "step": 3785 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005951297337280662, - "loss": 4.2605, - "step": 3790 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005951149595750149, - "loss": 4.3184, - "step": 3795 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005951001632308311, - "loss": 4.3056, - "step": 3800 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005950853446966273, - "loss": 4.3516, - "step": 3805 - }, - { - "epoch": 0.07, - "learning_rate": 0.000595070503973518, - "loss": 4.3533, - "step": 3810 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005950556410626189, - "loss": 4.2064, - "step": 3815 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005950407559650477, - "loss": 4.2559, - "step": 3820 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005950258486819239, - "loss": 4.347, - "step": 3825 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005950109192143681, - "loss": 4.2478, - "step": 3830 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005949959675635031, - "loss": 4.3066, - "step": 3835 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005949809937304532, - "loss": 4.1078, - "step": 3840 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005949659977163444, - "loss": 4.3285, - "step": 3845 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005949509795223042, - "loss": 4.211, - "step": 3850 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005949359391494619, - "loss": 4.2925, - "step": 3855 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005949208765989485, - "loss": 4.2087, - "step": 3860 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005949057918718966, - "loss": 4.3738, - "step": 3865 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005948906849694406, - "loss": 4.234, - "step": 3870 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005948755558927163, - "loss": 4.2358, - "step": 3875 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005948604046428614, - "loss": 4.2694, - "step": 3880 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005948452312210153, - "loss": 4.2782, - "step": 3885 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005948300356283188, - "loss": 4.3455, - "step": 3890 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005948148178659147, - "loss": 4.2975, - "step": 3895 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005947995779349471, - "loss": 4.3128, - "step": 3900 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005947843158365621, - "loss": 4.2444, - "step": 3905 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005947690315719073, - "loss": 4.2566, - "step": 3910 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005947537251421318, - "loss": 4.2086, - "step": 3915 - }, - { - "epoch": 0.07, - "learning_rate": 0.000594738396548387, - "loss": 4.2687, - "step": 3920 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005947230457918253, - "loss": 4.2127, - "step": 3925 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005947076728736009, - "loss": 4.3346, - "step": 3930 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005946922777948699, - "loss": 4.2569, - "step": 3935 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005946768605567898, - "loss": 4.327, - "step": 3940 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005946614211605201, - "loss": 4.3121, - "step": 3945 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005946459596072216, - "loss": 4.1934, - "step": 3950 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005946304758980569, - "loss": 4.0914, - "step": 3955 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005946149700341904, - "loss": 4.284, - "step": 3960 - }, - { - "epoch": 0.07, - "learning_rate": 0.000594599442016788, - "loss": 4.2219, - "step": 3965 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005945838918470174, - "loss": 4.2636, - "step": 3970 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005945683195260478, - "loss": 4.2416, - "step": 3975 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005945527250550503, - "loss": 4.1594, - "step": 3980 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005945371084351975, - "loss": 4.2529, - "step": 3985 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005945214696676635, - "loss": 4.2743, - "step": 3990 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005945058087536243, - "loss": 4.1218, - "step": 3995 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005944901256942578, - "loss": 4.2187, - "step": 4000 - }, - { - "epoch": 0.07, - "eval_loss": 4.265633583068848, - "eval_runtime": 150.1723, - "eval_samples_per_second": 12.259, - "eval_steps_per_second": 0.772, - "step": 4000 - }, - { - "epoch": 0.07, - "learning_rate": 0.000594474420490743, - "loss": 4.2666, - "step": 4005 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005944586931442609, - "loss": 4.12, - "step": 4010 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005944429436559942, - "loss": 4.2916, - "step": 4015 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005944271720271272, - "loss": 4.1823, - "step": 4020 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005944113782588456, - "loss": 4.2687, - "step": 4025 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005943955623523373, - "loss": 4.2593, - "step": 4030 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005943797243087915, - "loss": 4.2273, - "step": 4035 - }, - { - "epoch": 0.07, - "learning_rate": 0.000594363864129399, - "loss": 4.2475, - "step": 4040 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005943479818153526, - "loss": 4.2453, - "step": 4045 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005943320773678463, - "loss": 4.2077, - "step": 4050 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005943161507880765, - "loss": 4.211, - "step": 4055 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005943002020772403, - "loss": 4.3064, - "step": 4060 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005942842312365372, - "loss": 4.2258, - "step": 4065 - }, - { - "epoch": 0.07, - "learning_rate": 0.000594268238267168, - "loss": 4.1974, - "step": 4070 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005942522231703357, - "loss": 4.3158, - "step": 4075 - }, - { - "epoch": 0.07, - "learning_rate": 0.000594236185947244, - "loss": 4.2207, - "step": 4080 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005942201265990991, - "loss": 4.2394, - "step": 4085 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005942040451271085, - "loss": 4.2613, - "step": 4090 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005941879415324815, - "loss": 4.2127, - "step": 4095 - }, - { - "epoch": 0.07, - "learning_rate": 0.000594171815816429, - "loss": 4.2353, - "step": 4100 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005941556679801636, - "loss": 4.2169, - "step": 4105 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005941394980248994, - "loss": 4.1401, - "step": 4110 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005941233059518524, - "loss": 4.2061, - "step": 4115 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005941070917622401, - "loss": 4.1151, - "step": 4120 - }, - { - "epoch": 0.07, - "learning_rate": 0.000594090855457282, - "loss": 4.1882, - "step": 4125 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005940745970381985, - "loss": 4.1692, - "step": 4130 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005940583165062126, - "loss": 4.2102, - "step": 4135 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005940420138625482, - "loss": 4.1483, - "step": 4140 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005940256891084313, - "loss": 4.2406, - "step": 4145 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005940093422450895, - "loss": 4.2565, - "step": 4150 - }, - { - "epoch": 0.07, - "learning_rate": 0.000593992973273752, - "loss": 4.316, - "step": 4155 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005939765821956495, - "loss": 4.27, - "step": 4160 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005939601690120146, - "loss": 4.2913, - "step": 4165 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005939437337240816, - "loss": 4.1995, - "step": 4170 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005939272763330862, - "loss": 4.1578, - "step": 4175 - }, - { - "epoch": 0.07, - "learning_rate": 0.000593910796840266, - "loss": 4.2062, - "step": 4180 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005938942952468601, - "loss": 4.2138, - "step": 4185 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005938777715541095, - "loss": 4.1583, - "step": 4190 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005938612257632565, - "loss": 4.1554, - "step": 4195 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005938446578755453, - "loss": 4.1384, - "step": 4200 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005938280678922218, - "loss": 4.1932, - "step": 4205 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005938114558145334, - "loss": 4.262, - "step": 4210 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005937948216437294, - "loss": 4.3102, - "step": 4215 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005937781653810603, - "loss": 4.1807, - "step": 4220 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005937614870277788, - "loss": 4.1832, - "step": 4225 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005937447865851391, - "loss": 4.2501, - "step": 4230 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005937280640543968, - "loss": 4.2523, - "step": 4235 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005937113194368095, - "loss": 4.1573, - "step": 4240 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005936945527336362, - "loss": 4.1715, - "step": 4245 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005936777639461375, - "loss": 4.2046, - "step": 4250 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005936609530755761, - "loss": 4.2866, - "step": 4255 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005936441201232162, - "loss": 4.1373, - "step": 4260 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005936272650903232, - "loss": 4.2981, - "step": 4265 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005936103879781647, - "loss": 4.1283, - "step": 4270 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005935934887880099, - "loss": 4.2305, - "step": 4275 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005935765675211292, - "loss": 4.1224, - "step": 4280 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005935596241787953, - "loss": 4.2186, - "step": 4285 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005935426587622821, - "loss": 4.2636, - "step": 4290 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005935256712728653, - "loss": 4.1941, - "step": 4295 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005935086617118224, - "loss": 4.1388, - "step": 4300 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005934916300804322, - "loss": 4.1762, - "step": 4305 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005934745763799756, - "loss": 4.2763, - "step": 4310 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005934575006117349, - "loss": 4.2378, - "step": 4315 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005934404027769941, - "loss": 4.2348, - "step": 4320 - }, - { - "epoch": 0.07, - "learning_rate": 0.000593423282877039, - "loss": 4.203, - "step": 4325 - }, - { - "epoch": 0.07, - "learning_rate": 0.0005934061409131567, - "loss": 4.2091, - "step": 4330 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005933889768866362, - "loss": 4.1984, - "step": 4335 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005933717907987682, - "loss": 4.2025, - "step": 4340 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005933545826508453, - "loss": 4.2632, - "step": 4345 - }, - { - "epoch": 0.08, - "learning_rate": 0.000593337352444161, - "loss": 4.2896, - "step": 4350 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005933201001800111, - "loss": 4.2299, - "step": 4355 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005933028258596931, - "loss": 4.1947, - "step": 4360 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005932855294845055, - "loss": 4.176, - "step": 4365 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005932682110557494, - "loss": 4.087, - "step": 4370 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005932508705747266, - "loss": 4.1089, - "step": 4375 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005932335080427413, - "loss": 4.1443, - "step": 4380 - }, - { - "epoch": 0.08, - "learning_rate": 0.000593216123461099, - "loss": 4.2938, - "step": 4385 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005931987168311069, - "loss": 4.2061, - "step": 4390 - }, - { - "epoch": 0.08, - "learning_rate": 0.000593181288154074, - "loss": 4.1735, - "step": 4395 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005931638374313107, - "loss": 4.248, - "step": 4400 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005931463646641292, - "loss": 4.1558, - "step": 4405 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005931288698538434, - "loss": 4.2654, - "step": 4410 - }, - { - "epoch": 0.08, - "learning_rate": 0.000593111353001769, - "loss": 4.0433, - "step": 4415 - }, - { - "epoch": 0.08, - "learning_rate": 0.000593093814109223, - "loss": 4.217, - "step": 4420 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005930762531775242, - "loss": 4.1692, - "step": 4425 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005930586702079931, - "loss": 4.0659, - "step": 4430 - }, - { - "epoch": 0.08, - "learning_rate": 0.000593041065201952, - "loss": 4.219, - "step": 4435 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005930234381607246, - "loss": 4.235, - "step": 4440 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005930057890856364, - "loss": 4.1474, - "step": 4445 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005929881179780144, - "loss": 4.2813, - "step": 4450 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005929704248391875, - "loss": 4.1081, - "step": 4455 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005929527096704862, - "loss": 4.1982, - "step": 4460 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005929349724732424, - "loss": 4.2255, - "step": 4465 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005929172132487899, - "loss": 4.1753, - "step": 4470 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005928994319984643, - "loss": 4.2111, - "step": 4475 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005928816287236023, - "loss": 4.3088, - "step": 4480 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005928638034255429, - "loss": 4.3208, - "step": 4485 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005928459561056264, - "loss": 4.2369, - "step": 4490 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005928280867651947, - "loss": 4.252, - "step": 4495 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005928101954055918, - "loss": 4.1711, - "step": 4500 - }, - { - "epoch": 0.08, - "eval_loss": 4.222765922546387, - "eval_runtime": 150.6785, - "eval_samples_per_second": 12.218, - "eval_steps_per_second": 0.77, - "step": 4500 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005927922820281627, - "loss": 4.2227, - "step": 4505 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005927743466342545, - "loss": 4.1118, - "step": 4510 - }, - { - "epoch": 0.08, - "learning_rate": 0.000592756389225216, - "loss": 4.1699, - "step": 4515 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005927384098023973, - "loss": 4.1542, - "step": 4520 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005927204083671505, - "loss": 4.1589, - "step": 4525 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005927023849208291, - "loss": 4.1576, - "step": 4530 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005926843394647884, - "loss": 4.1702, - "step": 4535 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005926662720003855, - "loss": 4.1987, - "step": 4540 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005926481825289788, - "loss": 4.1527, - "step": 4545 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005926300710519285, - "loss": 4.147, - "step": 4550 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005926119375705965, - "loss": 4.0946, - "step": 4555 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005925937820863465, - "loss": 4.2065, - "step": 4560 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005925756046005436, - "loss": 4.2049, - "step": 4565 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005925574051145546, - "loss": 4.1496, - "step": 4570 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005925391836297481, - "loss": 4.0682, - "step": 4575 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005925209401474943, - "loss": 4.1447, - "step": 4580 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005925026746691649, - "loss": 4.1023, - "step": 4585 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005924843871961334, - "loss": 4.207, - "step": 4590 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005924660777297749, - "loss": 4.1938, - "step": 4595 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005924477462714664, - "loss": 4.1987, - "step": 4600 - }, - { - "epoch": 0.08, - "learning_rate": 0.000592429392822586, - "loss": 4.2236, - "step": 4605 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005924110173845139, - "loss": 4.2157, - "step": 4610 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005923926199586319, - "loss": 4.0652, - "step": 4615 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005923742005463235, - "loss": 4.1559, - "step": 4620 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005923557591489735, - "loss": 4.1209, - "step": 4625 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005923372957679688, - "loss": 4.0978, - "step": 4630 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005923188104046975, - "loss": 4.1673, - "step": 4635 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005923003030605499, - "loss": 4.1935, - "step": 4640 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005922817737369175, - "loss": 4.0448, - "step": 4645 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005922632224351936, - "loss": 3.9366, - "step": 4650 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005922446491567733, - "loss": 4.1289, - "step": 4655 - }, - { - "epoch": 0.08, - "learning_rate": 0.000592226053903053, - "loss": 4.1816, - "step": 4660 - }, - { - "epoch": 0.08, - "learning_rate": 0.000592207436675431, - "loss": 4.1968, - "step": 4665 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005921887974753075, - "loss": 4.2072, - "step": 4670 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005921701363040837, - "loss": 4.3708, - "step": 4675 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005921514531631631, - "loss": 4.1242, - "step": 4680 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005921327480539505, - "loss": 4.2098, - "step": 4685 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005921140209778524, - "loss": 4.0701, - "step": 4690 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005920952719362769, - "loss": 4.2459, - "step": 4695 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005920765009306341, - "loss": 4.1828, - "step": 4700 - }, - { - "epoch": 0.08, - "learning_rate": 0.000592057707962335, - "loss": 4.0961, - "step": 4705 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005920388930327932, - "loss": 4.1396, - "step": 4710 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005920200561434234, - "loss": 4.191, - "step": 4715 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005920011972956418, - "loss": 4.1557, - "step": 4720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005919823164908667, - "loss": 4.1885, - "step": 4725 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005919634137305179, - "loss": 4.0797, - "step": 4730 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005919444890160165, - "loss": 4.1649, - "step": 4735 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005919255423487859, - "loss": 4.0826, - "step": 4740 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005919065737302505, - "loss": 4.1142, - "step": 4745 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005918875831618367, - "loss": 4.1676, - "step": 4750 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005918685706449726, - "loss": 4.0972, - "step": 4755 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005918495361810878, - "loss": 4.1348, - "step": 4760 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005918304797716136, - "loss": 4.1432, - "step": 4765 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005918114014179829, - "loss": 4.2454, - "step": 4770 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005917923011216304, - "loss": 4.1003, - "step": 4775 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005917731788839921, - "loss": 4.1819, - "step": 4780 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005917540347065062, - "loss": 4.188, - "step": 4785 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005917348685906121, - "loss": 4.1215, - "step": 4790 - }, - { - "epoch": 0.08, - "learning_rate": 0.000591715680537751, - "loss": 4.1963, - "step": 4795 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005916964705493656, - "loss": 4.1186, - "step": 4800 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005916772386269007, - "loss": 4.1362, - "step": 4805 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005916579847718022, - "loss": 4.2354, - "step": 4810 - }, - { - "epoch": 0.08, - "learning_rate": 0.000591638708985518, - "loss": 4.1049, - "step": 4815 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005916194112694976, - "loss": 4.1688, - "step": 4820 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005916000916251918, - "loss": 4.0749, - "step": 4825 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005915807500540537, - "loss": 4.1612, - "step": 4830 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005915613865575374, - "loss": 4.1247, - "step": 4835 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005915420011370992, - "loss": 4.2301, - "step": 4840 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005915225937941965, - "loss": 4.1453, - "step": 4845 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005915031645302889, - "loss": 4.2323, - "step": 4850 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005914837133468372, - "loss": 4.2572, - "step": 4855 - }, - { - "epoch": 0.08, - "learning_rate": 0.000591464240245304, - "loss": 4.1619, - "step": 4860 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005914447452271537, - "loss": 4.1835, - "step": 4865 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005914252282938524, - "loss": 4.1756, - "step": 4870 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005914056894468671, - "loss": 4.2158, - "step": 4875 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005913861286876677, - "loss": 4.1847, - "step": 4880 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005913665460177246, - "loss": 4.1548, - "step": 4885 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005913469414385105, - "loss": 4.0973, - "step": 4890 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005913273149514996, - "loss": 4.1362, - "step": 4895 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005913076665581675, - "loss": 4.1658, - "step": 4900 - }, - { - "epoch": 0.08, - "learning_rate": 0.000591287996259992, - "loss": 4.1478, - "step": 4905 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005912683040584519, - "loss": 4.1785, - "step": 4910 - }, - { - "epoch": 0.09, - "learning_rate": 0.000591248589955028, - "loss": 4.2769, - "step": 4915 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005912288539512029, - "loss": 4.1097, - "step": 4920 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005912090960484603, - "loss": 4.1856, - "step": 4925 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005911893162482862, - "loss": 4.2468, - "step": 4930 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005911695145521679, - "loss": 4.2657, - "step": 4935 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005911496909615943, - "loss": 4.0184, - "step": 4940 - }, - { - "epoch": 0.09, - "learning_rate": 0.000591129845478056, - "loss": 4.0345, - "step": 4945 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005911099781030454, - "loss": 4.0501, - "step": 4950 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005910900888380564, - "loss": 4.1289, - "step": 4955 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005910701776845845, - "loss": 4.1621, - "step": 4960 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005910502446441269, - "loss": 4.0817, - "step": 4965 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005910302897181826, - "loss": 4.1124, - "step": 4970 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005910103129082519, - "loss": 4.1758, - "step": 4975 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005909903142158372, - "loss": 4.0607, - "step": 4980 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005909702936424421, - "loss": 4.1467, - "step": 4985 - }, - { - "epoch": 0.09, - "learning_rate": 0.000590950251189572, - "loss": 4.2868, - "step": 4990 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005909301868587343, - "loss": 4.1972, - "step": 4995 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005909101006514374, - "loss": 4.1593, - "step": 5000 - }, - { - "epoch": 0.09, - "eval_loss": 4.196778774261475, - "eval_runtime": 150.8836, - "eval_samples_per_second": 12.201, - "eval_steps_per_second": 0.769, - "step": 5000 - }, - { - "epoch": 0.09, - "learning_rate": 0.000590889992569192, - "loss": 4.1782, - "step": 5005 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005908698626135098, - "loss": 4.1774, - "step": 5010 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005908497107859046, - "loss": 4.1228, - "step": 5015 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005908295370878918, - "loss": 4.1645, - "step": 5020 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005908093415209883, - "loss": 4.1598, - "step": 5025 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005907891240867127, - "loss": 4.033, - "step": 5030 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005907688847865851, - "loss": 4.1476, - "step": 5035 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005907486236221278, - "loss": 4.0968, - "step": 5040 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005907283405948638, - "loss": 4.273, - "step": 5045 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005907080357063187, - "loss": 4.1311, - "step": 5050 - }, - { - "epoch": 0.09, - "learning_rate": 0.000590687708958019, - "loss": 3.9782, - "step": 5055 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005906673603514936, - "loss": 4.1972, - "step": 5060 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005906469898882721, - "loss": 4.0807, - "step": 5065 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005906265975698867, - "loss": 4.0946, - "step": 5070 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005906061833978705, - "loss": 4.1808, - "step": 5075 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005905857473737586, - "loss": 4.0355, - "step": 5080 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005905652894990878, - "loss": 4.2127, - "step": 5085 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005905448097753963, - "loss": 4.0982, - "step": 5090 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005905243082042242, - "loss": 4.1078, - "step": 5095 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005905037847871129, - "loss": 4.1049, - "step": 5100 - }, - { - "epoch": 0.09, - "learning_rate": 0.000590483239525606, - "loss": 4.1008, - "step": 5105 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005904626724212481, - "loss": 4.0823, - "step": 5110 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005904420834755858, - "loss": 4.0588, - "step": 5115 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005904214726901672, - "loss": 4.1389, - "step": 5120 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005904008400665424, - "loss": 4.1278, - "step": 5125 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005903801856062627, - "loss": 4.1523, - "step": 5130 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005903595093108812, - "loss": 4.23, - "step": 5135 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005903388111819526, - "loss": 4.046, - "step": 5140 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005903180912210335, - "loss": 4.0819, - "step": 5145 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005902973494296816, - "loss": 4.1195, - "step": 5150 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005902765858094569, - "loss": 4.0833, - "step": 5155 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005902558003619205, - "loss": 4.2782, - "step": 5160 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005902349930886357, - "loss": 4.1168, - "step": 5165 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005902141639911666, - "loss": 4.1654, - "step": 5170 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005901933130710797, - "loss": 4.0838, - "step": 5175 - }, - { - "epoch": 0.09, - "learning_rate": 0.000590172440329943, - "loss": 4.0421, - "step": 5180 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005901515457693258, - "loss": 4.1311, - "step": 5185 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005901306293907995, - "loss": 4.0227, - "step": 5190 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005901096911959365, - "loss": 4.2226, - "step": 5195 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005900887311863118, - "loss": 4.1306, - "step": 5200 - }, - { - "epoch": 0.09, - "learning_rate": 0.000590067749363501, - "loss": 4.0604, - "step": 5205 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005900467457290821, - "loss": 4.0125, - "step": 5210 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005900257202846344, - "loss": 4.1222, - "step": 5215 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005900046730317388, - "loss": 4.1159, - "step": 5220 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005899836039719782, - "loss": 4.1893, - "step": 5225 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005899625131069367, - "loss": 4.1073, - "step": 5230 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005899414004382001, - "loss": 4.0826, - "step": 5235 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005899202659673563, - "loss": 4.0734, - "step": 5240 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005898991096959941, - "loss": 4.1146, - "step": 5245 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005898779316257047, - "loss": 4.0675, - "step": 5250 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005898567317580804, - "loss": 4.0328, - "step": 5255 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005898355100947154, - "loss": 4.0624, - "step": 5260 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005898142666372055, - "loss": 4.0561, - "step": 5265 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005897930013871478, - "loss": 4.0266, - "step": 5270 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005897717143461418, - "loss": 3.9118, - "step": 5275 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005897504055157878, - "loss": 4.1576, - "step": 5280 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005897290748976883, - "loss": 4.0451, - "step": 5285 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005897077224934471, - "loss": 4.2013, - "step": 5290 - }, - { - "epoch": 0.09, - "learning_rate": 0.00058968634830467, - "loss": 4.1097, - "step": 5295 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005896649523329642, - "loss": 4.1492, - "step": 5300 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005896435345799383, - "loss": 4.0192, - "step": 5305 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005896220950472031, - "loss": 4.1101, - "step": 5310 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005896006337363707, - "loss": 4.1892, - "step": 5315 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005895791506490548, - "loss": 4.1187, - "step": 5320 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005895576457868708, - "loss": 4.0377, - "step": 5325 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005895361191514358, - "loss": 4.0711, - "step": 5330 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005895145707443684, - "loss": 4.0841, - "step": 5335 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005894930005672892, - "loss": 4.0504, - "step": 5340 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005894714086218199, - "loss": 4.1208, - "step": 5345 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005894497949095841, - "loss": 4.1813, - "step": 5350 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005894281594322073, - "loss": 4.1122, - "step": 5355 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005894065021913161, - "loss": 4.0417, - "step": 5360 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005893848231885391, - "loss": 4.0673, - "step": 5365 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005893631224255066, - "loss": 3.9842, - "step": 5370 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005893413999038501, - "loss": 4.0419, - "step": 5375 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005893196556252034, - "loss": 4.0908, - "step": 5380 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005892978895912011, - "loss": 4.1134, - "step": 5385 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005892761018034803, - "loss": 4.0354, - "step": 5390 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005892542922636791, - "loss": 4.1377, - "step": 5395 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005892324609734373, - "loss": 4.1254, - "step": 5400 - }, - { - "epoch": 0.09, - "learning_rate": 0.000589210607934397, - "loss": 4.0462, - "step": 5405 - }, - { - "epoch": 0.09, - "learning_rate": 0.000589188733148201, - "loss": 4.1928, - "step": 5410 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005891668366164945, - "loss": 4.109, - "step": 5415 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005891449183409238, - "loss": 3.9687, - "step": 5420 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005891229783231371, - "loss": 4.083, - "step": 5425 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005891010165647841, - "loss": 4.0274, - "step": 5430 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005890790330675162, - "loss": 4.0976, - "step": 5435 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005890570278329867, - "loss": 4.0589, - "step": 5440 - }, - { - "epoch": 0.09, - "learning_rate": 0.00058903500086285, - "loss": 4.1165, - "step": 5445 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005890129521587625, - "loss": 4.1122, - "step": 5450 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005889908817223822, - "loss": 4.094, - "step": 5455 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005889687895553687, - "loss": 4.0328, - "step": 5460 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005889466756593831, - "loss": 4.0535, - "step": 5465 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005889245400360883, - "loss": 4.1173, - "step": 5470 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005889023826871489, - "loss": 4.1297, - "step": 5475 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005888802036142308, - "loss": 4.0227, - "step": 5480 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005888580028190019, - "loss": 4.1865, - "step": 5485 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005888357803031317, - "loss": 4.0646, - "step": 5490 - }, - { - "epoch": 0.1, - "learning_rate": 0.000588813536068291, - "loss": 4.0189, - "step": 5495 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005887912701161524, - "loss": 4.0756, - "step": 5500 - }, - { - "epoch": 0.1, - "eval_loss": 4.124629974365234, - "eval_runtime": 150.3809, - "eval_samples_per_second": 12.242, - "eval_steps_per_second": 0.771, - "step": 5500 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005887689824483905, - "loss": 4.0788, - "step": 5505 - }, - { - "epoch": 0.1, - "learning_rate": 0.000588746673066681, - "loss": 4.0131, - "step": 5510 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005887243419727013, - "loss": 4.0943, - "step": 5515 - }, - { - "epoch": 0.1, - "learning_rate": 0.000588701989168131, - "loss": 4.1031, - "step": 5520 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005886796146546507, - "loss": 4.0249, - "step": 5525 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005886572184339427, - "loss": 4.1008, - "step": 5530 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005886348005076913, - "loss": 4.0013, - "step": 5535 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005886123608775821, - "loss": 4.0819, - "step": 5540 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005885898995453024, - "loss": 3.9572, - "step": 5545 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005885674165125415, - "loss": 4.0845, - "step": 5550 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005885449117809896, - "loss": 4.0497, - "step": 5555 - }, - { - "epoch": 0.1, - "learning_rate": 0.000588522385352339, - "loss": 4.1316, - "step": 5560 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005884998372282838, - "loss": 4.0639, - "step": 5565 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005884772674105193, - "loss": 4.1424, - "step": 5570 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005884546759007428, - "loss": 4.1344, - "step": 5575 - }, - { - "epoch": 0.1, - "learning_rate": 0.000588432062700653, - "loss": 4.1834, - "step": 5580 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005884094278119502, - "loss": 4.0697, - "step": 5585 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005883867712363365, - "loss": 4.1778, - "step": 5590 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005883640929755156, - "loss": 3.9826, - "step": 5595 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005883413930311928, - "loss": 4.1734, - "step": 5600 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005883186714050749, - "loss": 3.9475, - "step": 5605 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005882959280988705, - "loss": 4.0677, - "step": 5610 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005882731631142898, - "loss": 3.981, - "step": 5615 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005882503764530447, - "loss": 4.1465, - "step": 5620 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005882275681168485, - "loss": 4.1284, - "step": 5625 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005882047381074163, - "loss": 4.128, - "step": 5630 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005881818864264648, - "loss": 3.9485, - "step": 5635 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005881590130757124, - "loss": 4.0819, - "step": 5640 - }, - { - "epoch": 0.1, - "learning_rate": 0.000588136118056879, - "loss": 4.0794, - "step": 5645 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005881132013716862, - "loss": 4.0057, - "step": 5650 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005880902630218571, - "loss": 4.126, - "step": 5655 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005880673030091169, - "loss": 4.1839, - "step": 5660 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005880443213351917, - "loss": 4.0048, - "step": 5665 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005880213180018097, - "loss": 3.9995, - "step": 5670 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005879982930107007, - "loss": 4.047, - "step": 5675 - }, - { - "epoch": 0.1, - "learning_rate": 0.000587975246363596, - "loss": 4.0557, - "step": 5680 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005879521780622287, - "loss": 3.9903, - "step": 5685 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005879290881083333, - "loss": 4.0698, - "step": 5690 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005879059765036461, - "loss": 4.0808, - "step": 5695 - }, - { - "epoch": 0.1, - "learning_rate": 0.000587882843249905, - "loss": 3.9814, - "step": 5700 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005878596883488493, - "loss": 4.0462, - "step": 5705 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005878365118022205, - "loss": 4.057, - "step": 5710 - }, - { - "epoch": 0.1, - "learning_rate": 0.000587813313611761, - "loss": 4.0648, - "step": 5715 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005877900937792154, - "loss": 4.0972, - "step": 5720 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005877668523063297, - "loss": 4.1129, - "step": 5725 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005877435891948514, - "loss": 4.055, - "step": 5730 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005877203044465299, - "loss": 4.0274, - "step": 5735 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005876969980631161, - "loss": 4.1068, - "step": 5740 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005876736700463623, - "loss": 4.0272, - "step": 5745 - }, - { - "epoch": 0.1, - "learning_rate": 0.000587650320398023, - "loss": 4.0066, - "step": 5750 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005876269491198536, - "loss": 4.049, - "step": 5755 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005876035562136119, - "loss": 3.9857, - "step": 5760 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005875801416810566, - "loss": 3.9977, - "step": 5765 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005875567055239485, - "loss": 3.978, - "step": 5770 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005875332477440499, - "loss": 3.966, - "step": 5775 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005875097683431245, - "loss": 3.9962, - "step": 5780 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005874862673229381, - "loss": 4.0646, - "step": 5785 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005874627446852578, - "loss": 4.0243, - "step": 5790 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005874392004318521, - "loss": 3.9927, - "step": 5795 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005874156345644919, - "loss": 4.0585, - "step": 5800 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005873920470849488, - "loss": 4.0704, - "step": 5805 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005873684379949967, - "loss": 4.1157, - "step": 5810 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005873448072964108, - "loss": 4.0976, - "step": 5815 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005873211549909679, - "loss": 4.0073, - "step": 5820 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005872974810804467, - "loss": 3.999, - "step": 5825 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005872737855666273, - "loss": 4.0376, - "step": 5830 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005872500684512915, - "loss": 4.0754, - "step": 5835 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005872263297362227, - "loss": 4.0787, - "step": 5840 - }, - { - "epoch": 0.1, - "learning_rate": 0.000587202569423206, - "loss": 4.0246, - "step": 5845 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005871787875140278, - "loss": 4.0841, - "step": 5850 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005871549840104766, - "loss": 4.0973, - "step": 5855 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005871311589143424, - "loss": 4.1085, - "step": 5860 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005871073122274162, - "loss": 4.0267, - "step": 5865 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005870834439514919, - "loss": 4.0323, - "step": 5870 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005870595540883638, - "loss": 4.0608, - "step": 5875 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005870356426398283, - "loss": 4.0103, - "step": 5880 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005870117096076835, - "loss": 3.9965, - "step": 5885 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005869877549937291, - "loss": 4.087, - "step": 5890 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005869637787997663, - "loss": 4.0467, - "step": 5895 - }, - { - "epoch": 0.1, - "learning_rate": 0.000586939781027598, - "loss": 4.0311, - "step": 5900 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005869157616790287, - "loss": 4.0099, - "step": 5905 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005868917207558646, - "loss": 4.0817, - "step": 5910 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005868676582599134, - "loss": 3.966, - "step": 5915 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005868435741929845, - "loss": 4.0285, - "step": 5920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005868194685568886, - "loss": 3.9462, - "step": 5925 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005867953413534389, - "loss": 4.0755, - "step": 5930 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005867711925844492, - "loss": 3.9712, - "step": 5935 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005867470222517355, - "loss": 3.9844, - "step": 5940 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005867228303571153, - "loss": 3.9981, - "step": 5945 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005866986169024077, - "loss": 4.0011, - "step": 5950 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005866743818894334, - "loss": 3.9929, - "step": 5955 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005866501253200148, - "loss": 4.0131, - "step": 5960 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005866258471959758, - "loss": 4.0858, - "step": 5965 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005866015475191421, - "loss": 4.0778, - "step": 5970 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005865772262913407, - "loss": 4.0723, - "step": 5975 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005865528835144007, - "loss": 4.0553, - "step": 5980 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005865285191901524, - "loss": 4.0564, - "step": 5985 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005865041333204278, - "loss": 3.9492, - "step": 5990 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005864797259070607, - "loss": 3.9973, - "step": 5995 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005864552969518865, - "loss": 4.0681, - "step": 6000 - }, - { - "epoch": 0.1, - "eval_loss": 4.073433876037598, - "eval_runtime": 149.973, - "eval_samples_per_second": 12.276, - "eval_steps_per_second": 0.773, - "step": 6000 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005864308464567419, - "loss": 4.0337, - "step": 6005 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005864063744234657, - "loss": 4.0838, - "step": 6010 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005863818808538978, - "loss": 4.003, - "step": 6015 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005863573657498803, - "loss": 4.0435, - "step": 6020 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005863328291132564, - "loss": 4.0459, - "step": 6025 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005863082709458711, - "loss": 4.0842, - "step": 6030 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005862836912495713, - "loss": 4.0807, - "step": 6035 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005862590900262049, - "loss": 4.1169, - "step": 6040 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005862344672776221, - "loss": 4.1037, - "step": 6045 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005862098230056743, - "loss": 3.976, - "step": 6050 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005861851572122146, - "loss": 4.0173, - "step": 6055 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005861604698990978, - "loss": 4.0725, - "step": 6060 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005861357610681802, - "loss": 4.0116, - "step": 6065 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005861110307213197, - "loss": 4.0785, - "step": 6070 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005860862788603761, - "loss": 4.0062, - "step": 6075 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005860615054872104, - "loss": 4.0221, - "step": 6080 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005860367106036854, - "loss": 4.0759, - "step": 6085 - }, - { - "epoch": 0.11, - "learning_rate": 0.000586011894211666, - "loss": 4.0736, - "step": 6090 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005859870563130177, - "loss": 4.1106, - "step": 6095 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005859621969096085, - "loss": 3.976, - "step": 6100 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005859373160033076, - "loss": 3.9863, - "step": 6105 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005859124135959861, - "loss": 4.0505, - "step": 6110 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005858874896895163, - "loss": 4.0183, - "step": 6115 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005858625442857722, - "loss": 3.9544, - "step": 6120 - }, - { - "epoch": 0.11, - "learning_rate": 0.00058583757738663, - "loss": 4.0514, - "step": 6125 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005858125889939669, - "loss": 3.9714, - "step": 6130 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005857875791096617, - "loss": 4.0567, - "step": 6135 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005857625477355954, - "loss": 4.0103, - "step": 6140 - }, - { - "epoch": 0.11, - "learning_rate": 0.00058573749487365, - "loss": 3.9502, - "step": 6145 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005857124205257091, - "loss": 4.0495, - "step": 6150 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005856873246936588, - "loss": 4.1271, - "step": 6155 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005856622073793856, - "loss": 4.0907, - "step": 6160 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005856370685847784, - "loss": 4.0462, - "step": 6165 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005856119083117276, - "loss": 4.026, - "step": 6170 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005855867265621249, - "loss": 4.027, - "step": 6175 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005855615233378641, - "loss": 3.9725, - "step": 6180 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005855362986408401, - "loss": 4.0381, - "step": 6185 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005855110524729499, - "loss": 4.0856, - "step": 6190 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005854857848360917, - "loss": 4.059, - "step": 6195 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005854604957321657, - "loss": 4.0826, - "step": 6200 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005854351851630732, - "loss": 4.0564, - "step": 6205 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005854098531307177, - "loss": 3.918, - "step": 6210 - }, - { - "epoch": 0.11, - "learning_rate": 0.000585384499637004, - "loss": 4.0235, - "step": 6215 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005853591246838384, - "loss": 4.0396, - "step": 6220 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005853337282731291, - "loss": 4.0262, - "step": 6225 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005853083104067858, - "loss": 4.0257, - "step": 6230 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005852828710867197, - "loss": 3.9727, - "step": 6235 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005852574103148437, - "loss": 4.0742, - "step": 6240 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005852319280930724, - "loss": 4.0149, - "step": 6245 - }, - { - "epoch": 0.11, - "learning_rate": 0.000585206424423322, - "loss": 4.0074, - "step": 6250 - }, - { - "epoch": 0.11, - "learning_rate": 0.00058518089930751, - "loss": 4.0104, - "step": 6255 - }, - { - "epoch": 0.11, - "learning_rate": 0.000585155352747556, - "loss": 4.0947, - "step": 6260 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005851297847453808, - "loss": 4.0355, - "step": 6265 - }, - { - "epoch": 0.11, - "learning_rate": 0.000585104195302907, - "loss": 4.1322, - "step": 6270 - }, - { - "epoch": 0.11, - "learning_rate": 0.000585078584422059, - "loss": 4.2945, - "step": 6275 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005850529521047623, - "loss": 4.0278, - "step": 6280 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005850272983529446, - "loss": 4.0641, - "step": 6285 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005850016231685348, - "loss": 4.0774, - "step": 6290 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005849759265534635, - "loss": 4.0438, - "step": 6295 - }, - { - "epoch": 0.11, - "learning_rate": 0.000584950208509663, - "loss": 4.0762, - "step": 6300 - }, - { - "epoch": 0.11, - "learning_rate": 0.000584924469039067, - "loss": 3.9799, - "step": 6305 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005848987081436115, - "loss": 4.0686, - "step": 6310 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005848729258252329, - "loss": 3.993, - "step": 6315 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005848471220858705, - "loss": 3.9435, - "step": 6320 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005848212969274643, - "loss": 4.056, - "step": 6325 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005847954503519561, - "loss": 4.0116, - "step": 6330 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005847695823612897, - "loss": 3.9138, - "step": 6335 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005847436929574101, - "loss": 4.0694, - "step": 6340 - }, - { - "epoch": 0.11, - "learning_rate": 0.000584717782142264, - "loss": 4.1302, - "step": 6345 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005846918499177998, - "loss": 4.1207, - "step": 6350 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005846658962859677, - "loss": 4.0034, - "step": 6355 - }, - { - "epoch": 0.11, - "learning_rate": 0.000584639921248719, - "loss": 3.8664, - "step": 6360 - }, - { - "epoch": 0.11, - "learning_rate": 0.000584613924808007, - "loss": 3.914, - "step": 6365 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005845879069657865, - "loss": 3.9682, - "step": 6370 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005845618677240138, - "loss": 3.9832, - "step": 6375 - }, - { - "epoch": 0.11, - "learning_rate": 0.000584535807084647, - "loss": 3.9809, - "step": 6380 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005845097250496457, - "loss": 3.9714, - "step": 6385 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005844836216209714, - "loss": 3.992, - "step": 6390 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005844574968005865, - "loss": 3.9963, - "step": 6395 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005844313505904557, - "loss": 4.062, - "step": 6400 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005844051829925451, - "loss": 3.9474, - "step": 6405 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005843789940088223, - "loss": 3.9946, - "step": 6410 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005843527836412565, - "loss": 4.0315, - "step": 6415 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005843265518918185, - "loss": 4.0435, - "step": 6420 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005843002987624812, - "loss": 4.0449, - "step": 6425 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005842740242552182, - "loss": 3.9634, - "step": 6430 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005842477283720058, - "loss": 4.1614, - "step": 6435 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005842214111148207, - "loss": 4.0237, - "step": 6440 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005841950724856422, - "loss": 4.0143, - "step": 6445 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005841687124864506, - "loss": 4.0254, - "step": 6450 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005841423311192283, - "loss": 3.9378, - "step": 6455 - }, - { - "epoch": 0.11, - "learning_rate": 0.000584115928385959, - "loss": 4.0162, - "step": 6460 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005840895042886278, - "loss": 4.0237, - "step": 6465 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005840630588292218, - "loss": 3.951, - "step": 6470 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005840365920097296, - "loss": 3.9824, - "step": 6475 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005840101038321414, - "loss": 3.8698, - "step": 6480 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005839835942984488, - "loss": 3.9789, - "step": 6485 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005839570634106455, - "loss": 3.9527, - "step": 6490 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005839305111707262, - "loss": 4.0282, - "step": 6495 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005839039375806875, - "loss": 4.0262, - "step": 6500 - }, - { - "epoch": 0.11, - "eval_loss": 4.031991481781006, - "eval_runtime": 150.4726, - "eval_samples_per_second": 12.235, - "eval_steps_per_second": 0.771, - "step": 6500 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005838773426425277, - "loss": 3.972, - "step": 6505 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005838507263582467, - "loss": 3.908, - "step": 6510 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005838240887298457, - "loss": 3.945, - "step": 6515 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005837974297593278, - "loss": 3.9304, - "step": 6520 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005837707494486976, - "loss": 4.0567, - "step": 6525 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005837440477999613, - "loss": 4.0024, - "step": 6530 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005837173248151269, - "loss": 4.0126, - "step": 6535 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005836905804962036, - "loss": 3.9493, - "step": 6540 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005836638148452027, - "loss": 4.0822, - "step": 6545 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005836370278641365, - "loss": 3.9565, - "step": 6550 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005836102195550195, - "loss": 4.0498, - "step": 6555 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005835833899198675, - "loss": 4.138, - "step": 6560 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005835565389606978, - "loss": 4.028, - "step": 6565 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005835296666795297, - "loss": 3.8704, - "step": 6570 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005835027730783836, - "loss": 4.0168, - "step": 6575 - }, - { - "epoch": 0.11, - "learning_rate": 0.000583475858159282, - "loss": 3.9846, - "step": 6580 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005834489219242486, - "loss": 3.9255, - "step": 6585 - }, - { - "epoch": 0.11, - "learning_rate": 0.000583421964375309, - "loss": 3.9709, - "step": 6590 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005833949855144901, - "loss": 3.971, - "step": 6595 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005833679853438208, - "loss": 4.0187, - "step": 6600 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005833409638653311, - "loss": 4.0978, - "step": 6605 - }, - { - "epoch": 0.11, - "learning_rate": 0.000583313921081053, - "loss": 4.0233, - "step": 6610 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005832868569930201, - "loss": 3.9346, - "step": 6615 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005832597716032673, - "loss": 3.9806, - "step": 6620 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005832326649138315, - "loss": 3.9501, - "step": 6625 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005832055369267507, - "loss": 3.8945, - "step": 6630 - }, - { - "epoch": 0.11, - "learning_rate": 0.000583178387644065, - "loss": 4.0411, - "step": 6635 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005831512170678157, - "loss": 4.033, - "step": 6640 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005831240252000461, - "loss": 3.9629, - "step": 6645 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005830968120428008, - "loss": 3.9754, - "step": 6650 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005830695775981261, - "loss": 3.961, - "step": 6655 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005830423218680698, - "loss": 3.8399, - "step": 6660 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005830150448546816, - "loss": 3.8657, - "step": 6665 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005829877465600122, - "loss": 3.9245, - "step": 6670 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005829604269861149, - "loss": 4.0341, - "step": 6675 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005829330861350433, - "loss": 3.9792, - "step": 6680 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005829057240088538, - "loss": 4.0263, - "step": 6685 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005828783406096037, - "loss": 3.9451, - "step": 6690 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005828509359393522, - "loss": 3.9552, - "step": 6695 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005828235100001597, - "loss": 4.0922, - "step": 6700 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005827960627940888, - "loss": 3.9879, - "step": 6705 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005827685943232034, - "loss": 4.0245, - "step": 6710 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005827411045895687, - "loss": 4.0227, - "step": 6715 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005827135935952521, - "loss": 3.8635, - "step": 6720 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005826860613423221, - "loss": 3.9943, - "step": 6725 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005826585078328489, - "loss": 4.047, - "step": 6730 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005826309330689047, - "loss": 3.9673, - "step": 6735 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005826033370525627, - "loss": 3.9391, - "step": 6740 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005825757197858981, - "loss": 4.0063, - "step": 6745 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005825480812709876, - "loss": 3.9123, - "step": 6750 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005825204215099093, - "loss": 4.0236, - "step": 6755 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005824927405047432, - "loss": 3.9006, - "step": 6760 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005824650382575709, - "loss": 3.9333, - "step": 6765 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005824373147704753, - "loss": 3.9388, - "step": 6770 - }, - { - "epoch": 0.12, - "learning_rate": 0.000582409570045541, - "loss": 3.9877, - "step": 6775 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005823818040848545, - "loss": 4.0653, - "step": 6780 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005823540168905034, - "loss": 3.9874, - "step": 6785 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005823262084645773, - "loss": 4.0155, - "step": 6790 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005822983788091672, - "loss": 3.9023, - "step": 6795 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005822705279263658, - "loss": 4.119, - "step": 6800 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005822426558182674, - "loss": 3.99, - "step": 6805 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005822147624869676, - "loss": 4.0494, - "step": 6810 - }, - { - "epoch": 0.12, - "learning_rate": 0.000582186847934564, - "loss": 3.9168, - "step": 6815 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005821589121631557, - "loss": 3.8806, - "step": 6820 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005821309551748432, - "loss": 3.7961, - "step": 6825 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005821029769717288, - "loss": 3.7826, - "step": 6830 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005820749775559163, - "loss": 3.9815, - "step": 6835 - }, - { - "epoch": 0.12, - "learning_rate": 0.000582046956929511, - "loss": 3.9875, - "step": 6840 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005820189150946201, - "loss": 4.0475, - "step": 6845 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005819908520533521, - "loss": 3.9757, - "step": 6850 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005819627678078172, - "loss": 4.0318, - "step": 6855 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005819346623601271, - "loss": 3.994, - "step": 6860 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005819065357123955, - "loss": 4.0183, - "step": 6865 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005818783878667371, - "loss": 3.8478, - "step": 6870 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005818502188252684, - "loss": 3.9599, - "step": 6875 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005818220285901078, - "loss": 3.9233, - "step": 6880 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005817938171633749, - "loss": 3.9714, - "step": 6885 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005817655845471912, - "loss": 4.0335, - "step": 6890 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005817373307436795, - "loss": 3.9459, - "step": 6895 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005817090557549644, - "loss": 3.8784, - "step": 6900 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005816807595831721, - "loss": 3.9412, - "step": 6905 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005816524422304303, - "loss": 3.8951, - "step": 6910 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005816241036988683, - "loss": 3.9854, - "step": 6915 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005815957439906169, - "loss": 3.8417, - "step": 6920 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005815673631078088, - "loss": 3.9827, - "step": 6925 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005815389610525778, - "loss": 4.0293, - "step": 6930 - }, - { - "epoch": 0.12, - "learning_rate": 0.00058151053782706, - "loss": 3.9579, - "step": 6935 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005814820934333925, - "loss": 4.0445, - "step": 6940 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005814536278737141, - "loss": 3.9389, - "step": 6945 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005814251411501652, - "loss": 3.947, - "step": 6950 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005813966332648881, - "loss": 3.9215, - "step": 6955 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005813681042200262, - "loss": 3.8809, - "step": 6960 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005813395540177249, - "loss": 3.9399, - "step": 6965 - }, - { - "epoch": 0.12, - "learning_rate": 0.000581310982660131, - "loss": 3.906, - "step": 6970 - }, - { - "epoch": 0.12, - "learning_rate": 0.000581282390149393, - "loss": 3.9693, - "step": 6975 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005812537764876606, - "loss": 3.9784, - "step": 6980 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005812251416770857, - "loss": 4.0167, - "step": 6985 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005811964857198214, - "loss": 4.0349, - "step": 6990 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005811678086180225, - "loss": 3.9012, - "step": 6995 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005811391103738454, - "loss": 4.0148, - "step": 7000 - }, - { - "epoch": 0.12, - "eval_loss": 3.98817777633667, - "eval_runtime": 150.0793, - "eval_samples_per_second": 12.267, - "eval_steps_per_second": 0.773, - "step": 7000 - }, - { - "epoch": 0.12, - "learning_rate": 0.000581110390989448, - "loss": 3.9029, - "step": 7005 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005810816504669897, - "loss": 4.0553, - "step": 7010 - }, - { - "epoch": 0.12, - "learning_rate": 0.000581052888808632, - "loss": 4.097, - "step": 7015 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005810241060165374, - "loss": 3.9799, - "step": 7020 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005809953020928703, - "loss": 3.9113, - "step": 7025 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005809664770397964, - "loss": 3.9452, - "step": 7030 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005809376308594835, - "loss": 3.9655, - "step": 7035 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005809087635541005, - "loss": 3.8256, - "step": 7040 - }, - { - "epoch": 0.12, - "learning_rate": 0.000580879875125818, - "loss": 3.9917, - "step": 7045 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005808509655768086, - "loss": 3.9427, - "step": 7050 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005808220349092458, - "loss": 4.0305, - "step": 7055 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005807930831253052, - "loss": 3.9139, - "step": 7060 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005807641102271637, - "loss": 3.9324, - "step": 7065 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005807351162170001, - "loss": 3.9821, - "step": 7070 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005807061010969944, - "loss": 3.9877, - "step": 7075 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005806770648693286, - "loss": 4.0239, - "step": 7080 - }, - { - "epoch": 0.12, - "learning_rate": 0.000580648007536186, - "loss": 3.9869, - "step": 7085 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005806189290997514, - "loss": 3.9419, - "step": 7090 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005805898295622115, - "loss": 4.0456, - "step": 7095 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005805607089257545, - "loss": 3.9479, - "step": 7100 - }, - { - "epoch": 0.12, - "learning_rate": 0.00058053156719257, - "loss": 3.9563, - "step": 7105 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005805024043648493, - "loss": 3.9586, - "step": 7110 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005804732204447854, - "loss": 3.9508, - "step": 7115 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005804440154345728, - "loss": 3.9228, - "step": 7120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005804147893364073, - "loss": 3.9427, - "step": 7125 - }, - { - "epoch": 0.12, - "learning_rate": 0.000580385542152487, - "loss": 3.9727, - "step": 7130 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005803562738850107, - "loss": 3.8936, - "step": 7135 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005803269845361794, - "loss": 3.8108, - "step": 7140 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005802976741081956, - "loss": 3.943, - "step": 7145 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005802683426032631, - "loss": 3.9068, - "step": 7150 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005802389900235878, - "loss": 3.931, - "step": 7155 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005802096163713764, - "loss": 4.0288, - "step": 7160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005801802216488381, - "loss": 3.8147, - "step": 7165 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005801508058581829, - "loss": 3.8442, - "step": 7170 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005801213690016228, - "loss": 3.9372, - "step": 7175 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005800919110813716, - "loss": 4.0047, - "step": 7180 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005800624320996439, - "loss": 3.9978, - "step": 7185 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005800329320586568, - "loss": 3.9016, - "step": 7190 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005800034109606282, - "loss": 3.9061, - "step": 7195 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005799738688077781, - "loss": 3.9114, - "step": 7200 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005799443056023279, - "loss": 3.8848, - "step": 7205 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005799147213465006, - "loss": 3.9432, - "step": 7210 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005798851160425209, - "loss": 3.9281, - "step": 7215 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005798554896926149, - "loss": 3.8136, - "step": 7220 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005798258422990101, - "loss": 3.9658, - "step": 7225 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005797961738639362, - "loss": 3.9506, - "step": 7230 - }, - { - "epoch": 0.13, - "learning_rate": 0.000579766484389624, - "loss": 4.0028, - "step": 7235 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005797367738783059, - "loss": 3.9895, - "step": 7240 - }, - { - "epoch": 0.13, - "learning_rate": 0.000579707042332216, - "loss": 3.9246, - "step": 7245 - }, - { - "epoch": 0.13, - "learning_rate": 0.00057967728975359, - "loss": 3.9121, - "step": 7250 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005796475161446651, - "loss": 3.8231, - "step": 7255 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005796177215076801, - "loss": 3.9446, - "step": 7260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005795879058448756, - "loss": 3.9487, - "step": 7265 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005795580691584934, - "loss": 3.9777, - "step": 7270 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005795282114507772, - "loss": 3.8589, - "step": 7275 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005794983327239719, - "loss": 3.9822, - "step": 7280 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005794684329803244, - "loss": 3.9954, - "step": 7285 - }, - { - "epoch": 0.13, - "learning_rate": 0.000579438512222083, - "loss": 3.9695, - "step": 7290 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005794085704514977, - "loss": 3.9504, - "step": 7295 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005793786076708198, - "loss": 3.9779, - "step": 7300 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005793486238823024, - "loss": 3.8648, - "step": 7305 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005793186190882001, - "loss": 3.9049, - "step": 7310 - }, - { - "epoch": 0.13, - "learning_rate": 0.000579288593290769, - "loss": 3.9022, - "step": 7315 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005792585464922673, - "loss": 3.9056, - "step": 7320 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005792284786949538, - "loss": 3.9115, - "step": 7325 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005791983899010899, - "loss": 3.953, - "step": 7330 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005791682801129379, - "loss": 3.9481, - "step": 7335 - }, - { - "epoch": 0.13, - "learning_rate": 0.000579138149332762, - "loss": 3.8981, - "step": 7340 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005791079975628277, - "loss": 4.0269, - "step": 7345 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005790778248054025, - "loss": 3.9883, - "step": 7350 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005790476310627551, - "loss": 3.9876, - "step": 7355 - }, - { - "epoch": 0.13, - "learning_rate": 0.000579017416337156, - "loss": 3.988, - "step": 7360 - }, - { - "epoch": 0.13, - "learning_rate": 0.000578987180630877, - "loss": 3.9376, - "step": 7365 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005789569239461918, - "loss": 3.9417, - "step": 7370 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005789266462853757, - "loss": 4.0789, - "step": 7375 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005788963476507052, - "loss": 3.9385, - "step": 7380 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005788660280444586, - "loss": 3.929, - "step": 7385 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005788356874689159, - "loss": 4.0291, - "step": 7390 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005788053259263587, - "loss": 3.8872, - "step": 7395 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005787749434190695, - "loss": 3.8675, - "step": 7400 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005787445399493335, - "loss": 4.0703, - "step": 7405 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005787141155194365, - "loss": 3.9236, - "step": 7410 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005786836701316665, - "loss": 3.9127, - "step": 7415 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005786532037883128, - "loss": 3.8897, - "step": 7420 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005786227164916662, - "loss": 3.9272, - "step": 7425 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005785922082440192, - "loss": 3.9871, - "step": 7430 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005785616790476659, - "loss": 3.9186, - "step": 7435 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005785311289049019, - "loss": 3.974, - "step": 7440 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005785005578180245, - "loss": 3.9538, - "step": 7445 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005784699657893325, - "loss": 4.0123, - "step": 7450 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005784393528211261, - "loss": 3.9068, - "step": 7455 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005784087189157074, - "loss": 3.7432, - "step": 7460 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005783780640753798, - "loss": 3.9067, - "step": 7465 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005783473883024486, - "loss": 3.9339, - "step": 7470 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005783166915992202, - "loss": 3.8964, - "step": 7475 - }, - { - "epoch": 0.13, - "learning_rate": 0.000578285973968003, - "loss": 3.8972, - "step": 7480 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005782552354111067, - "loss": 3.9025, - "step": 7485 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005782244759308427, - "loss": 3.979, - "step": 7490 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005781936955295241, - "loss": 3.9776, - "step": 7495 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005781628942094653, - "loss": 3.9969, - "step": 7500 - }, - { - "epoch": 0.13, - "eval_loss": 3.961434841156006, - "eval_runtime": 149.9847, - "eval_samples_per_second": 12.275, - "eval_steps_per_second": 0.773, - "step": 7500 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005781320719729822, - "loss": 3.9575, - "step": 7505 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005781012288223929, - "loss": 3.8435, - "step": 7510 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005780703647600165, - "loss": 3.9905, - "step": 7515 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005780394797881735, - "loss": 3.8025, - "step": 7520 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005780085739091867, - "loss": 3.9212, - "step": 7525 - }, - { - "epoch": 0.13, - "learning_rate": 0.00057797764712538, - "loss": 3.8277, - "step": 7530 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005779466994390788, - "loss": 3.8959, - "step": 7535 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005779157308526102, - "loss": 3.9607, - "step": 7540 - }, - { - "epoch": 0.13, - "learning_rate": 0.000577884741368303, - "loss": 3.9904, - "step": 7545 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005778537309884873, - "loss": 4.0796, - "step": 7550 - }, - { - "epoch": 0.13, - "learning_rate": 0.000577822699715495, - "loss": 3.893, - "step": 7555 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005777916475516597, - "loss": 3.953, - "step": 7560 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005777605744993159, - "loss": 3.9167, - "step": 7565 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005777294805608006, - "loss": 3.8872, - "step": 7570 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005776983657384515, - "loss": 3.9342, - "step": 7575 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005776672300346086, - "loss": 3.9996, - "step": 7580 - }, - { - "epoch": 0.13, - "learning_rate": 0.000577636073451613, - "loss": 3.9381, - "step": 7585 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005776048959918076, - "loss": 4.0558, - "step": 7590 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005775736976575367, - "loss": 3.904, - "step": 7595 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005775424784511462, - "loss": 3.9933, - "step": 7600 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005775112383749839, - "loss": 3.995, - "step": 7605 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005774799774313985, - "loss": 3.9822, - "step": 7610 - }, - { - "epoch": 0.13, - "learning_rate": 0.000577448695622741, - "loss": 3.7759, - "step": 7615 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005774173929513633, - "loss": 3.7732, - "step": 7620 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005773860694196195, - "loss": 3.8441, - "step": 7625 - }, - { - "epoch": 0.13, - "learning_rate": 0.000577354725029865, - "loss": 3.9408, - "step": 7630 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005773233597844564, - "loss": 3.9715, - "step": 7635 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005772919736857524, - "loss": 3.8511, - "step": 7640 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005772605667361131, - "loss": 3.9275, - "step": 7645 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005772291389379002, - "loss": 3.8361, - "step": 7650 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005771976902934768, - "loss": 3.8592, - "step": 7655 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005771662208052077, - "loss": 3.8605, - "step": 7660 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005771347304754593, - "loss": 3.885, - "step": 7665 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005771032193065994, - "loss": 3.8989, - "step": 7670 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005770716873009975, - "loss": 3.9576, - "step": 7675 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005770401344610248, - "loss": 3.8623, - "step": 7680 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005770085607890537, - "loss": 3.8787, - "step": 7685 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005769769662874585, - "loss": 3.9269, - "step": 7690 - }, - { - "epoch": 0.13, - "learning_rate": 0.000576945350958615, - "loss": 3.947, - "step": 7695 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005769137148049002, - "loss": 3.869, - "step": 7700 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005768820578286935, - "loss": 3.9183, - "step": 7705 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005768503800323748, - "loss": 3.94, - "step": 7710 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005768186814183265, - "loss": 3.7839, - "step": 7715 - }, - { - "epoch": 0.13, - "learning_rate": 0.000576786961988932, - "loss": 3.8936, - "step": 7720 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005767552217465765, - "loss": 3.9323, - "step": 7725 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005767234606936465, - "loss": 3.8951, - "step": 7730 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005766916788325306, - "loss": 3.8801, - "step": 7735 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005766598761656186, - "loss": 3.9485, - "step": 7740 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005766280526953016, - "loss": 3.9617, - "step": 7745 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005765962084239727, - "loss": 4.004, - "step": 7750 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005765643433540265, - "loss": 3.9779, - "step": 7755 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005765324574878592, - "loss": 3.8105, - "step": 7760 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005765005508278681, - "loss": 3.9296, - "step": 7765 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005764686233764527, - "loss": 3.9916, - "step": 7770 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005764366751360137, - "loss": 3.8613, - "step": 7775 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005764047061089534, - "loss": 3.8955, - "step": 7780 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005763727162976758, - "loss": 3.9348, - "step": 7785 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005763407057045863, - "loss": 3.9172, - "step": 7790 - }, - { - "epoch": 0.13, - "learning_rate": 0.000576308674332092, - "loss": 3.8628, - "step": 7795 - }, - { - "epoch": 0.13, - "learning_rate": 0.0005762766221826015, - "loss": 3.9669, - "step": 7800 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005762445492585247, - "loss": 3.9162, - "step": 7805 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005762124555622737, - "loss": 3.9174, - "step": 7810 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005761803410962615, - "loss": 3.7882, - "step": 7815 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005761482058629032, - "loss": 3.9603, - "step": 7820 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005761160498646149, - "loss": 3.8465, - "step": 7825 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005760838731038147, - "loss": 3.8924, - "step": 7830 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005760516755829225, - "loss": 3.9661, - "step": 7835 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005760194573043586, - "loss": 3.8856, - "step": 7840 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005759872182705463, - "loss": 3.9037, - "step": 7845 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005759549584839096, - "loss": 3.8099, - "step": 7850 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005759226779468742, - "loss": 3.9186, - "step": 7855 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005758903766618676, - "loss": 3.8689, - "step": 7860 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005758580546313185, - "loss": 3.9651, - "step": 7865 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005758257118576574, - "loss": 3.8181, - "step": 7870 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005757933483433165, - "loss": 3.9611, - "step": 7875 - }, - { - "epoch": 0.14, - "learning_rate": 0.000575760964090729, - "loss": 3.8883, - "step": 7880 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005757285591023305, - "loss": 3.9339, - "step": 7885 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005756961333805574, - "loss": 3.9402, - "step": 7890 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005756636869278478, - "loss": 3.9722, - "step": 7895 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005756312197466418, - "loss": 3.8141, - "step": 7900 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005755987318393807, - "loss": 3.851, - "step": 7905 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005755662232085075, - "loss": 3.9061, - "step": 7910 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005755336938564663, - "loss": 3.846, - "step": 7915 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005755011437857037, - "loss": 3.8962, - "step": 7920 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005754685729986668, - "loss": 3.9421, - "step": 7925 - }, - { - "epoch": 0.14, - "learning_rate": 0.000575435981497805, - "loss": 3.8796, - "step": 7930 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005754033692855691, - "loss": 3.8234, - "step": 7935 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005753707363644111, - "loss": 3.8632, - "step": 7940 - }, - { - "epoch": 0.14, - "learning_rate": 0.000575338082736785, - "loss": 3.8991, - "step": 7945 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005753054084051462, - "loss": 4.0148, - "step": 7950 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005752727133719516, - "loss": 3.9543, - "step": 7955 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005752399976396596, - "loss": 3.8603, - "step": 7960 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005752072612107306, - "loss": 3.8823, - "step": 7965 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005751745040876258, - "loss": 3.8419, - "step": 7970 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005751417262728085, - "loss": 3.9414, - "step": 7975 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005751089277687436, - "loss": 3.911, - "step": 7980 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005750761085778972, - "loss": 3.9677, - "step": 7985 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005750432687027372, - "loss": 3.9097, - "step": 7990 - }, - { - "epoch": 0.14, - "learning_rate": 0.000575010408145733, - "loss": 3.827, - "step": 7995 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005749775269093554, - "loss": 3.8829, - "step": 8000 - }, - { - "epoch": 0.14, - "eval_loss": 3.9269142150878906, - "eval_runtime": 150.3729, - "eval_samples_per_second": 12.243, - "eval_steps_per_second": 0.771, - "step": 8000 - }, - { - "epoch": 0.14, - "learning_rate": 0.000574944624996077, - "loss": 3.8017, - "step": 8005 - }, - { - "epoch": 0.14, - "learning_rate": 0.000574911702408372, - "loss": 3.9319, - "step": 8010 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005748787591487158, - "loss": 3.9119, - "step": 8015 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005748457952195856, - "loss": 3.9751, - "step": 8020 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005748128106234602, - "loss": 3.8432, - "step": 8025 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005747798053628198, - "loss": 3.9297, - "step": 8030 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005747467794401464, - "loss": 3.8792, - "step": 8035 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005747137328579231, - "loss": 3.8893, - "step": 8040 - }, - { - "epoch": 0.14, - "learning_rate": 0.000574680665618635, - "loss": 3.8906, - "step": 8045 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005746475777247686, - "loss": 3.8385, - "step": 8050 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005746144691788119, - "loss": 3.7674, - "step": 8055 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005745813399832544, - "loss": 3.8042, - "step": 8060 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005745481901405874, - "loss": 3.9017, - "step": 8065 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005745150196533037, - "loss": 3.8173, - "step": 8070 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005744818285238972, - "loss": 3.8445, - "step": 8075 - }, - { - "epoch": 0.14, - "learning_rate": 0.000574448616754864, - "loss": 3.9037, - "step": 8080 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005744153843487012, - "loss": 3.8863, - "step": 8085 - }, - { - "epoch": 0.14, - "learning_rate": 0.000574382131307908, - "loss": 3.8032, - "step": 8090 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005743488576349846, - "loss": 3.8524, - "step": 8095 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005743155633324332, - "loss": 3.7371, - "step": 8100 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005742822484027573, - "loss": 3.8358, - "step": 8105 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005742489128484619, - "loss": 4.0695, - "step": 8110 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005742155566720538, - "loss": 3.8984, - "step": 8115 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005741821798760412, - "loss": 3.8755, - "step": 8120 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005741487824629339, - "loss": 3.9444, - "step": 8125 - }, - { - "epoch": 0.14, - "learning_rate": 0.000574115364435243, - "loss": 3.9359, - "step": 8130 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005740819257954814, - "loss": 3.9182, - "step": 8135 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005740484665461639, - "loss": 3.9721, - "step": 8140 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005740149866898061, - "loss": 3.8413, - "step": 8145 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005739814862289256, - "loss": 3.9575, - "step": 8150 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005739479651660415, - "loss": 3.8612, - "step": 8155 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005739144235036744, - "loss": 3.8002, - "step": 8160 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005738808612443464, - "loss": 3.8984, - "step": 8165 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005738472783905811, - "loss": 3.8158, - "step": 8170 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005738136749449041, - "loss": 3.8328, - "step": 8175 - }, - { - "epoch": 0.14, - "learning_rate": 0.000573780050909842, - "loss": 3.8829, - "step": 8180 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005737464062879231, - "loss": 3.7717, - "step": 8185 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005737127410816775, - "loss": 3.879, - "step": 8190 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005736790552936364, - "loss": 3.9169, - "step": 8195 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005736453489263329, - "loss": 3.8645, - "step": 8200 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005736116219823016, - "loss": 3.8988, - "step": 8205 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005735778744640787, - "loss": 3.8545, - "step": 8210 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005735441063742016, - "loss": 3.7853, - "step": 8215 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005735103177152096, - "loss": 3.8307, - "step": 8220 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005734765084896435, - "loss": 3.9213, - "step": 8225 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005734426787000453, - "loss": 3.9432, - "step": 8230 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005734088283489593, - "loss": 3.868, - "step": 8235 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005733749574389304, - "loss": 3.827, - "step": 8240 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005733410659725058, - "loss": 3.9695, - "step": 8245 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005733071539522339, - "loss": 3.8528, - "step": 8250 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005732732213806647, - "loss": 3.8285, - "step": 8255 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005732392682603497, - "loss": 3.9186, - "step": 8260 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005732052945938421, - "loss": 3.8465, - "step": 8265 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005731713003836965, - "loss": 3.8373, - "step": 8270 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005731372856324691, - "loss": 3.9738, - "step": 8275 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005731032503427176, - "loss": 3.8417, - "step": 8280 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005730691945170013, - "loss": 3.9056, - "step": 8285 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005730351181578811, - "loss": 3.9564, - "step": 8290 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005730010212679192, - "loss": 3.8577, - "step": 8295 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005729669038496796, - "loss": 3.8578, - "step": 8300 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005729327659057279, - "loss": 3.9018, - "step": 8305 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005728986074386309, - "loss": 3.9108, - "step": 8310 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005728644284509572, - "loss": 3.8183, - "step": 8315 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005728302289452769, - "loss": 3.8654, - "step": 8320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005727960089241615, - "loss": 3.8433, - "step": 8325 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005727617683901845, - "loss": 3.8515, - "step": 8330 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005727275073459202, - "loss": 3.8286, - "step": 8335 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005726932257939452, - "loss": 3.8778, - "step": 8340 - }, - { - "epoch": 0.14, - "learning_rate": 0.000572658923736837, - "loss": 3.7871, - "step": 8345 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005726246011771752, - "loss": 3.9097, - "step": 8350 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005725902581175406, - "loss": 3.837, - "step": 8355 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005725558945605155, - "loss": 3.7465, - "step": 8360 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005725215105086839, - "loss": 3.9166, - "step": 8365 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005724871059646316, - "loss": 3.9286, - "step": 8370 - }, - { - "epoch": 0.14, - "learning_rate": 0.0005724526809309452, - "loss": 3.785, - "step": 8375 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005724182354102136, - "loss": 3.8503, - "step": 8380 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005723837694050267, - "loss": 3.8255, - "step": 8385 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005723492829179764, - "loss": 3.887, - "step": 8390 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005723147759516558, - "loss": 3.7949, - "step": 8395 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005722802485086595, - "loss": 3.8919, - "step": 8400 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005722457005915841, - "loss": 3.8261, - "step": 8405 - }, - { - "epoch": 0.15, - "learning_rate": 0.000572211132203027, - "loss": 3.8836, - "step": 8410 - }, - { - "epoch": 0.15, - "learning_rate": 0.000572176543345588, - "loss": 3.7968, - "step": 8415 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005721419340218678, - "loss": 3.8791, - "step": 8420 - }, - { - "epoch": 0.15, - "learning_rate": 0.000572107304234469, - "loss": 3.8896, - "step": 8425 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005720726539859953, - "loss": 3.8461, - "step": 8430 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005720379832790524, - "loss": 3.8805, - "step": 8435 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005720032921162473, - "loss": 3.7594, - "step": 8440 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005719685805001886, - "loss": 3.906, - "step": 8445 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005719338484334866, - "loss": 3.8963, - "step": 8450 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005718990959187527, - "loss": 3.8806, - "step": 8455 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005718643229586002, - "loss": 3.9259, - "step": 8460 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005718295295556441, - "loss": 3.8151, - "step": 8465 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005717947157125004, - "loss": 3.8314, - "step": 8470 - }, - { - "epoch": 0.15, - "learning_rate": 0.000571759881431787, - "loss": 3.8986, - "step": 8475 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005717250267161232, - "loss": 3.8939, - "step": 8480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00057169015156813, - "loss": 3.9307, - "step": 8485 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005716552559904297, - "loss": 3.7868, - "step": 8490 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005716203399856466, - "loss": 3.8096, - "step": 8495 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005715854035564058, - "loss": 3.8729, - "step": 8500 - }, - { - "epoch": 0.15, - "eval_loss": 3.8896238803863525, - "eval_runtime": 150.0738, - "eval_samples_per_second": 12.267, - "eval_steps_per_second": 0.773, - "step": 8500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005715504467053345, - "loss": 3.7995, - "step": 8505 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005715154694350613, - "loss": 3.8546, - "step": 8510 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005714804717482163, - "loss": 3.8225, - "step": 8515 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005714454536474311, - "loss": 3.866, - "step": 8520 - }, - { - "epoch": 0.15, - "learning_rate": 0.000571410415135339, - "loss": 3.8313, - "step": 8525 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005713753562145746, - "loss": 3.8833, - "step": 8530 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005713402768877743, - "loss": 3.8299, - "step": 8535 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005713051771575756, - "loss": 3.9089, - "step": 8540 - }, - { - "epoch": 0.15, - "learning_rate": 0.000571270057026618, - "loss": 3.8661, - "step": 8545 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005712349164975424, - "loss": 3.7924, - "step": 8550 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005711997555729912, - "loss": 3.9294, - "step": 8555 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005711645742556082, - "loss": 3.7973, - "step": 8560 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005711293725480389, - "loss": 3.7566, - "step": 8565 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005710941504529304, - "loss": 3.9148, - "step": 8570 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005710589079729309, - "loss": 3.8453, - "step": 8575 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005710236451106909, - "loss": 3.8899, - "step": 8580 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005709883618688617, - "loss": 3.8985, - "step": 8585 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005709530582500964, - "loss": 3.8628, - "step": 8590 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005709177342570498, - "loss": 3.7169, - "step": 8595 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005708823898923779, - "loss": 3.8679, - "step": 8600 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005708470251587387, - "loss": 3.9674, - "step": 8605 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005708116400587912, - "loss": 3.9565, - "step": 8610 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005707762345951963, - "loss": 3.786, - "step": 8615 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005707408087706163, - "loss": 3.8328, - "step": 8620 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005707053625877149, - "loss": 3.8651, - "step": 8625 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005706698960491576, - "loss": 3.8354, - "step": 8630 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005706344091576113, - "loss": 3.9148, - "step": 8635 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005705989019157443, - "loss": 3.743, - "step": 8640 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005705633743262269, - "loss": 3.9586, - "step": 8645 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005705278263917301, - "loss": 3.9242, - "step": 8650 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005704922581149275, - "loss": 3.7486, - "step": 8655 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005704566694984931, - "loss": 3.8465, - "step": 8660 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005704210605451033, - "loss": 3.9023, - "step": 8665 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005703854312574357, - "loss": 3.9225, - "step": 8670 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005703497816381692, - "loss": 3.9354, - "step": 8675 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005703141116899847, - "loss": 3.8554, - "step": 8680 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005702784214155645, - "loss": 3.8595, - "step": 8685 - }, - { - "epoch": 0.15, - "learning_rate": 0.000570242710817592, - "loss": 3.8911, - "step": 8690 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005702069798987526, - "loss": 3.9046, - "step": 8695 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005701712286617332, - "loss": 3.9157, - "step": 8700 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005701354571092219, - "loss": 3.7783, - "step": 8705 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005700996652439088, - "loss": 3.8195, - "step": 8710 - }, - { - "epoch": 0.15, - "learning_rate": 0.000570063853068485, - "loss": 3.8212, - "step": 8715 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005700280205856435, - "loss": 3.8575, - "step": 8720 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005699921677980788, - "loss": 3.8246, - "step": 8725 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005699562947084867, - "loss": 3.7655, - "step": 8730 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005699204013195648, - "loss": 3.6899, - "step": 8735 - }, - { - "epoch": 0.15, - "learning_rate": 0.000569884487634012, - "loss": 3.8449, - "step": 8740 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005698485536545289, - "loss": 3.8651, - "step": 8745 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005698125993838176, - "loss": 3.8758, - "step": 8750 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005697766248245813, - "loss": 3.7971, - "step": 8755 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005697406299795257, - "loss": 3.8458, - "step": 8760 - }, - { - "epoch": 0.15, - "learning_rate": 0.000569704614851357, - "loss": 3.8787, - "step": 8765 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005696685794427835, - "loss": 3.7689, - "step": 8770 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005696325237565148, - "loss": 3.8765, - "step": 8775 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005695964477952623, - "loss": 3.8841, - "step": 8780 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005695603515617384, - "loss": 3.8682, - "step": 8785 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005695242350586576, - "loss": 3.9042, - "step": 8790 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005694880982887356, - "loss": 3.8366, - "step": 8795 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005694519412546899, - "loss": 3.72, - "step": 8800 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005694157639592388, - "loss": 3.8754, - "step": 8805 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005693795664051032, - "loss": 3.7116, - "step": 8810 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005693433485950048, - "loss": 3.8559, - "step": 8815 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005693071105316668, - "loss": 3.8147, - "step": 8820 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005692708522178144, - "loss": 3.8115, - "step": 8825 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005692345736561738, - "loss": 3.8211, - "step": 8830 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005691982748494732, - "loss": 3.7904, - "step": 8835 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005691619558004418, - "loss": 3.725, - "step": 8840 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005691256165118109, - "loss": 3.7322, - "step": 8845 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005690892569863127, - "loss": 3.8021, - "step": 8850 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005690528772266816, - "loss": 3.8315, - "step": 8855 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005690164772356529, - "loss": 3.8825, - "step": 8860 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005689800570159639, - "loss": 3.9128, - "step": 8865 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005689436165703531, - "loss": 3.7504, - "step": 8870 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005689071559015605, - "loss": 3.8452, - "step": 8875 - }, - { - "epoch": 0.15, - "learning_rate": 0.000568870675012328, - "loss": 3.8525, - "step": 8880 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005688341739053988, - "loss": 3.8366, - "step": 8885 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005687976525835173, - "loss": 3.9315, - "step": 8890 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005687611110494299, - "loss": 3.863, - "step": 8895 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005687245493058845, - "loss": 3.9057, - "step": 8900 - }, - { - "epoch": 0.15, - "learning_rate": 0.00056868796735563, - "loss": 3.8194, - "step": 8905 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005686513652014175, - "loss": 3.8638, - "step": 8910 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005686147428459991, - "loss": 3.8555, - "step": 8915 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005685781002921286, - "loss": 3.7245, - "step": 8920 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005685414375425615, - "loss": 3.8012, - "step": 8925 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005685047546000544, - "loss": 3.8829, - "step": 8930 - }, - { - "epoch": 0.15, - "learning_rate": 0.000568468051467366, - "loss": 3.7756, - "step": 8935 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005684313281472559, - "loss": 3.8913, - "step": 8940 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005683945846424856, - "loss": 3.7455, - "step": 8945 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005683578209558181, - "loss": 3.7563, - "step": 8950 - }, - { - "epoch": 0.15, - "learning_rate": 0.0005683210370900177, - "loss": 3.8759, - "step": 8955 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005682842330478505, - "loss": 3.8912, - "step": 8960 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005682474088320839, - "loss": 3.9026, - "step": 8965 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005682105644454869, - "loss": 3.8464, - "step": 8970 - }, - { - "epoch": 0.16, - "learning_rate": 0.00056817369989083, - "loss": 3.8083, - "step": 8975 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005681368151708852, - "loss": 3.8982, - "step": 8980 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005680999102884261, - "loss": 3.8016, - "step": 8985 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005680629852462278, - "loss": 3.9751, - "step": 8990 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005680260400470668, - "loss": 3.8363, - "step": 8995 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005679890746937211, - "loss": 3.8684, - "step": 9000 - }, - { - "epoch": 0.16, - "eval_loss": 3.870915412902832, - "eval_runtime": 150.275, - "eval_samples_per_second": 12.251, - "eval_steps_per_second": 0.772, - "step": 9000 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005679520891889705, - "loss": 3.9171, - "step": 9005 - }, - { - "epoch": 0.16, - "learning_rate": 0.000567915083535596, - "loss": 3.9465, - "step": 9010 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005678780577363802, - "loss": 3.9283, - "step": 9015 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005678410117941075, - "loss": 3.8472, - "step": 9020 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005678039457115632, - "loss": 3.9198, - "step": 9025 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005677668594915347, - "loss": 3.8925, - "step": 9030 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005677297531368106, - "loss": 3.893, - "step": 9035 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005676926266501812, - "loss": 3.8164, - "step": 9040 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005676554800344382, - "loss": 3.8225, - "step": 9045 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005676183132923748, - "loss": 3.8328, - "step": 9050 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005675811264267858, - "loss": 3.8067, - "step": 9055 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005675439194404673, - "loss": 3.8443, - "step": 9060 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005675066923362174, - "loss": 3.7139, - "step": 9065 - }, - { - "epoch": 0.16, - "learning_rate": 0.000567469445116835, - "loss": 3.733, - "step": 9070 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005674321777851211, - "loss": 3.7259, - "step": 9075 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005673948903438781, - "loss": 3.8794, - "step": 9080 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005673575827959097, - "loss": 3.7507, - "step": 9085 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005673202551440212, - "loss": 3.8077, - "step": 9090 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005672829073910196, - "loss": 3.778, - "step": 9095 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005672455395397132, - "loss": 3.8871, - "step": 9100 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005672081515929118, - "loss": 3.8326, - "step": 9105 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005671707435534267, - "loss": 3.8089, - "step": 9110 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005671333154240709, - "loss": 3.8371, - "step": 9115 - }, - { - "epoch": 0.16, - "learning_rate": 0.000567095867207659, - "loss": 3.7742, - "step": 9120 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005670583989070066, - "loss": 3.8002, - "step": 9125 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005670209105249313, - "loss": 3.7759, - "step": 9130 - }, - { - "epoch": 0.16, - "learning_rate": 0.000566983402064252, - "loss": 3.8101, - "step": 9135 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005669458735277891, - "loss": 3.8326, - "step": 9140 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005669083249183646, - "loss": 3.9168, - "step": 9145 - }, - { - "epoch": 0.16, - "learning_rate": 0.000566870756238802, - "loss": 3.8423, - "step": 9150 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005668331674919261, - "loss": 3.7503, - "step": 9155 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005667955586805635, - "loss": 3.9171, - "step": 9160 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005667579298075424, - "loss": 3.844, - "step": 9165 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005667202808756919, - "loss": 3.9111, - "step": 9170 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005666826118878432, - "loss": 3.7965, - "step": 9175 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005666449228468288, - "loss": 3.8016, - "step": 9180 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005666072137554829, - "loss": 3.7283, - "step": 9185 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005665694846166408, - "loss": 3.7984, - "step": 9190 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005665317354331395, - "loss": 3.7922, - "step": 9195 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005664939662078178, - "loss": 3.7183, - "step": 9200 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005664561769435155, - "loss": 3.9538, - "step": 9205 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005664183676430743, - "loss": 3.7491, - "step": 9210 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005663805383093372, - "loss": 3.8107, - "step": 9215 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005663426889451488, - "loss": 3.8527, - "step": 9220 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005663048195533552, - "loss": 3.8206, - "step": 9225 - }, - { - "epoch": 0.16, - "learning_rate": 0.000566266930136804, - "loss": 3.8189, - "step": 9230 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005662290206983441, - "loss": 3.7953, - "step": 9235 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005661910912408264, - "loss": 3.8083, - "step": 9240 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005661531417671027, - "loss": 3.7971, - "step": 9245 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005661151722800269, - "loss": 3.6886, - "step": 9250 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005660771827824539, - "loss": 3.7777, - "step": 9255 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005660391732772404, - "loss": 3.6483, - "step": 9260 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005660011437672445, - "loss": 3.8698, - "step": 9265 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005659630942553258, - "loss": 3.8082, - "step": 9270 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005659250247443454, - "loss": 3.8567, - "step": 9275 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005658869352371661, - "loss": 3.8349, - "step": 9280 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005658488257366518, - "loss": 3.8353, - "step": 9285 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005658106962456684, - "loss": 3.7528, - "step": 9290 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005657725467670828, - "loss": 3.7924, - "step": 9295 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005657343773037637, - "loss": 3.8372, - "step": 9300 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005656961878585813, - "loss": 3.7955, - "step": 9305 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005656579784344074, - "loss": 3.7235, - "step": 9310 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005656197490341149, - "loss": 3.8506, - "step": 9315 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005655814996605785, - "loss": 3.844, - "step": 9320 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005655432303166744, - "loss": 3.7618, - "step": 9325 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005655049410052804, - "loss": 3.7935, - "step": 9330 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005654666317292754, - "loss": 3.9014, - "step": 9335 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005654283024915403, - "loss": 3.829, - "step": 9340 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005653899532949571, - "loss": 3.7477, - "step": 9345 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005653515841424095, - "loss": 3.9187, - "step": 9350 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005653131950367826, - "loss": 3.8836, - "step": 9355 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005652747859809632, - "loss": 3.8617, - "step": 9360 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005652363569778395, - "loss": 3.8054, - "step": 9365 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005651979080303009, - "loss": 3.8272, - "step": 9370 - }, - { - "epoch": 0.16, - "learning_rate": 0.000565159439141239, - "loss": 3.8475, - "step": 9375 - }, - { - "epoch": 0.16, - "learning_rate": 0.000565120950313546, - "loss": 3.823, - "step": 9380 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005650824415501163, - "loss": 3.9115, - "step": 9385 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005650439128538455, - "loss": 3.837, - "step": 9390 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005650053642276307, - "loss": 3.7632, - "step": 9395 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005649667956743708, - "loss": 3.7894, - "step": 9400 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005649282071969658, - "loss": 3.8228, - "step": 9405 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005648895987983172, - "loss": 3.8639, - "step": 9410 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005648509704813284, - "loss": 3.8556, - "step": 9415 - }, - { - "epoch": 0.16, - "learning_rate": 0.000564812322248904, - "loss": 3.856, - "step": 9420 - }, - { - "epoch": 0.16, - "learning_rate": 0.00056477365410395, - "loss": 3.8072, - "step": 9425 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005647349660493743, - "loss": 3.8082, - "step": 9430 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005646962580880858, - "loss": 3.7869, - "step": 9435 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005646575302229953, - "loss": 3.8706, - "step": 9440 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005646187824570148, - "loss": 3.7245, - "step": 9445 - }, - { - "epoch": 0.16, - "learning_rate": 0.000564580014793058, - "loss": 3.7688, - "step": 9450 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005645412272340401, - "loss": 3.7546, - "step": 9455 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005645024197828777, - "loss": 3.7613, - "step": 9460 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005644635924424888, - "loss": 3.7864, - "step": 9465 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005644247452157932, - "loss": 3.8333, - "step": 9470 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005643858781057118, - "loss": 3.8384, - "step": 9475 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005643469911151673, - "loss": 3.8734, - "step": 9480 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005643080842470839, - "loss": 3.8705, - "step": 9485 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005642691575043872, - "loss": 3.7985, - "step": 9490 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005642302108900041, - "loss": 3.8142, - "step": 9495 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005641912444068634, - "loss": 3.8861, - "step": 9500 - }, - { - "epoch": 0.16, - "eval_loss": 3.8443562984466553, - "eval_runtime": 150.875, - "eval_samples_per_second": 12.202, - "eval_steps_per_second": 0.769, - "step": 9500 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005641522580578949, - "loss": 3.8243, - "step": 9505 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005641132518460305, - "loss": 3.817, - "step": 9510 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005640742257742031, - "loss": 3.7974, - "step": 9515 - }, - { - "epoch": 0.16, - "learning_rate": 0.0005640351798453472, - "loss": 3.8722, - "step": 9520 - }, - { - "epoch": 0.16, - "learning_rate": 0.000563996114062399, - "loss": 3.8327, - "step": 9525 - }, - { - "epoch": 0.16, - "learning_rate": 0.000563957028428296, - "loss": 3.7176, - "step": 9530 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005639179229459772, - "loss": 3.8109, - "step": 9535 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005638787976183831, - "loss": 3.7644, - "step": 9540 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005638396524484558, - "loss": 3.8023, - "step": 9545 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005638004874391389, - "loss": 3.7181, - "step": 9550 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005637613025933772, - "loss": 3.8218, - "step": 9555 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005637220979141174, - "loss": 3.821, - "step": 9560 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005636828734043072, - "loss": 3.7989, - "step": 9565 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005636436290668965, - "loss": 3.8229, - "step": 9570 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005636043649048359, - "loss": 3.7486, - "step": 9575 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005635650809210781, - "loss": 3.7537, - "step": 9580 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005635257771185768, - "loss": 3.8646, - "step": 9585 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005634864535002878, - "loss": 3.8951, - "step": 9590 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005634471100691678, - "loss": 3.8565, - "step": 9595 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005634077468281752, - "loss": 3.865, - "step": 9600 - }, - { - "epoch": 0.17, - "learning_rate": 0.00056336836378027, - "loss": 3.7137, - "step": 9605 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005633289609284135, - "loss": 3.875, - "step": 9610 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005632895382755689, - "loss": 3.7517, - "step": 9615 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005632500958247002, - "loss": 3.8388, - "step": 9620 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005632106335787734, - "loss": 3.7177, - "step": 9625 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005631711515407561, - "loss": 3.9332, - "step": 9630 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005631316497136167, - "loss": 3.7879, - "step": 9635 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005630921281003259, - "loss": 3.8254, - "step": 9640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005630525867038553, - "loss": 3.7538, - "step": 9645 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005630130255271784, - "loss": 3.8595, - "step": 9650 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005629734445732698, - "loss": 3.8395, - "step": 9655 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005629338438451059, - "loss": 3.8212, - "step": 9660 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005628942233456645, - "loss": 3.6969, - "step": 9665 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005628545830779248, - "loss": 3.8934, - "step": 9670 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005628149230448677, - "loss": 3.784, - "step": 9675 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005627752432494751, - "loss": 3.7458, - "step": 9680 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005627355436947309, - "loss": 3.7764, - "step": 9685 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005626958243836205, - "loss": 3.8557, - "step": 9690 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005626560853191302, - "loss": 3.7871, - "step": 9695 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005626163265042485, - "loss": 3.8723, - "step": 9700 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005625765479419649, - "loss": 3.7628, - "step": 9705 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005625367496352706, - "loss": 3.7876, - "step": 9710 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005624969315871581, - "loss": 3.8288, - "step": 9715 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005624570938006217, - "loss": 3.7054, - "step": 9720 - }, - { - "epoch": 0.17, - "learning_rate": 0.000562417236278657, - "loss": 3.8978, - "step": 9725 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005623773590242608, - "loss": 3.7314, - "step": 9730 - }, - { - "epoch": 0.17, - "learning_rate": 0.000562337462040432, - "loss": 3.844, - "step": 9735 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005622975453301706, - "loss": 3.8081, - "step": 9740 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005622576088964779, - "loss": 3.7213, - "step": 9745 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005622176527423571, - "loss": 3.7476, - "step": 9750 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005621776768708126, - "loss": 3.8561, - "step": 9755 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005621376812848505, - "loss": 3.8914, - "step": 9760 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005620976659874782, - "loss": 3.706, - "step": 9765 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005620576309817046, - "loss": 3.7814, - "step": 9770 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005620175762705402, - "loss": 3.7574, - "step": 9775 - }, - { - "epoch": 0.17, - "learning_rate": 0.000561977501856997, - "loss": 3.8686, - "step": 9780 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005619374077440882, - "loss": 3.8096, - "step": 9785 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005618972939348286, - "loss": 3.8072, - "step": 9790 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005618571604322349, - "loss": 3.8532, - "step": 9795 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005618170072393246, - "loss": 3.7328, - "step": 9800 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005617768343591172, - "loss": 3.8532, - "step": 9805 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005617366417946335, - "loss": 3.8652, - "step": 9810 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005616964295488957, - "loss": 3.7735, - "step": 9815 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005616561976249275, - "loss": 3.7548, - "step": 9820 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005616159460257542, - "loss": 3.764, - "step": 9825 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005615756747544026, - "loss": 3.7759, - "step": 9830 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005615353838139006, - "loss": 3.7218, - "step": 9835 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005614950732072784, - "loss": 3.8552, - "step": 9840 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005614547429375667, - "loss": 3.7065, - "step": 9845 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005614143930077982, - "loss": 3.8019, - "step": 9850 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005613740234210072, - "loss": 3.7688, - "step": 9855 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005613336341802291, - "loss": 3.7568, - "step": 9860 - }, - { - "epoch": 0.17, - "learning_rate": 0.000561293225288501, - "loss": 3.7632, - "step": 9865 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005612527967488614, - "loss": 3.6562, - "step": 9870 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005612123485643505, - "loss": 3.8351, - "step": 9875 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005611718807380096, - "loss": 3.8006, - "step": 9880 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005611313932728818, - "loss": 3.6692, - "step": 9885 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005610908861720114, - "loss": 3.7825, - "step": 9890 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005610503594384446, - "loss": 3.7926, - "step": 9895 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005610098130752286, - "loss": 3.8703, - "step": 9900 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005609692470854122, - "loss": 3.7963, - "step": 9905 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005609286614720459, - "loss": 3.8204, - "step": 9910 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005608880562381815, - "loss": 3.8013, - "step": 9915 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005608474313868724, - "loss": 3.7682, - "step": 9920 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005608067869211733, - "loss": 3.7958, - "step": 9925 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005607661228441402, - "loss": 3.7584, - "step": 9930 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005607254391588312, - "loss": 3.8284, - "step": 9935 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005606847358683054, - "loss": 3.7367, - "step": 9940 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005606440129756234, - "loss": 3.7325, - "step": 9945 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005606032704838475, - "loss": 3.6778, - "step": 9950 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005605625083960412, - "loss": 3.7351, - "step": 9955 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005605217267152696, - "loss": 3.812, - "step": 9960 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005604809254445992, - "loss": 3.7126, - "step": 9965 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005604401045870983, - "loss": 3.8023, - "step": 9970 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005603992641458362, - "loss": 3.7676, - "step": 9975 - }, - { - "epoch": 0.17, - "learning_rate": 0.000560358404123884, - "loss": 3.8314, - "step": 9980 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005603175245243141, - "loss": 3.7735, - "step": 9985 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005602766253502005, - "loss": 3.752, - "step": 9990 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005602357066046185, - "loss": 3.8366, - "step": 9995 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005601947682906452, - "loss": 3.7659, - "step": 10000 - }, - { - "epoch": 0.17, - "eval_loss": 3.824028253555298, - "eval_runtime": 150.1823, - "eval_samples_per_second": 12.258, - "eval_steps_per_second": 0.772, - "step": 10000 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005601538104113585, - "loss": 3.7734, - "step": 10005 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005601128329698388, - "loss": 3.7532, - "step": 10010 - }, - { - "epoch": 0.17, - "learning_rate": 0.000560071835969167, - "loss": 3.828, - "step": 10015 - }, - { - "epoch": 0.17, - "learning_rate": 0.000560030819412426, - "loss": 3.8327, - "step": 10020 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005599897833027, - "loss": 3.9457, - "step": 10025 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005599487276430749, - "loss": 3.7388, - "step": 10030 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005599076524366375, - "loss": 3.8464, - "step": 10035 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005598665576864768, - "loss": 3.7601, - "step": 10040 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005598254433956826, - "loss": 3.7211, - "step": 10045 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005597843095673466, - "loss": 3.7223, - "step": 10050 - }, - { - "epoch": 0.17, - "learning_rate": 0.000559743156204562, - "loss": 3.6862, - "step": 10055 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005597019833104232, - "loss": 3.8612, - "step": 10060 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005596607908880263, - "loss": 3.7786, - "step": 10065 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005596195789404685, - "loss": 3.9116, - "step": 10070 - }, - { - "epoch": 0.17, - "learning_rate": 0.000559578347470849, - "loss": 3.8694, - "step": 10075 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005595370964822681, - "loss": 3.7191, - "step": 10080 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005594958259778276, - "loss": 3.8221, - "step": 10085 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005594545359606308, - "loss": 3.8228, - "step": 10090 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005594132264337827, - "loss": 3.8175, - "step": 10095 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005593718974003895, - "loss": 3.7765, - "step": 10100 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005593305488635587, - "loss": 3.7567, - "step": 10105 - }, - { - "epoch": 0.17, - "learning_rate": 0.0005592891808263998, - "loss": 3.8263, - "step": 10110 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005592477932920232, - "loss": 3.664, - "step": 10115 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005592063862635414, - "loss": 3.873, - "step": 10120 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005591649597440675, - "loss": 3.7406, - "step": 10125 - }, - { - "epoch": 0.18, - "learning_rate": 0.000559123513736717, - "loss": 3.7542, - "step": 10130 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005590820482446063, - "loss": 3.8525, - "step": 10135 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005590405632708533, - "loss": 3.7978, - "step": 10140 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005589990588185773, - "loss": 3.7052, - "step": 10145 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005589575348908996, - "loss": 3.8114, - "step": 10150 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005589159914909425, - "loss": 3.7365, - "step": 10155 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005588744286218297, - "loss": 3.7698, - "step": 10160 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005588328462866864, - "loss": 3.7538, - "step": 10165 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005587912444886397, - "loss": 3.7885, - "step": 10170 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005587496232308177, - "loss": 3.761, - "step": 10175 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005587079825163502, - "loss": 3.7494, - "step": 10180 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005586663223483681, - "loss": 3.7903, - "step": 10185 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005586246427300043, - "loss": 3.7783, - "step": 10190 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005585829436643928, - "loss": 3.7313, - "step": 10195 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005585412251546691, - "loss": 3.8423, - "step": 10200 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005584994872039704, - "loss": 3.8002, - "step": 10205 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005584577298154349, - "loss": 3.7193, - "step": 10210 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005584159529922028, - "loss": 3.7501, - "step": 10215 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005583741567374153, - "loss": 3.8655, - "step": 10220 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005583323410542154, - "loss": 3.7713, - "step": 10225 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005582905059457473, - "loss": 3.7608, - "step": 10230 - }, - { - "epoch": 0.18, - "learning_rate": 0.000558248651415157, - "loss": 3.7848, - "step": 10235 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005582067774655916, - "loss": 3.8751, - "step": 10240 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005581648841001997, - "loss": 3.8157, - "step": 10245 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005581229713221318, - "loss": 3.7949, - "step": 10250 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005580810391345391, - "loss": 3.7647, - "step": 10255 - }, - { - "epoch": 0.18, - "learning_rate": 0.000558039087540575, - "loss": 3.7398, - "step": 10260 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005579971165433939, - "loss": 3.7206, - "step": 10265 - }, - { - "epoch": 0.18, - "learning_rate": 0.000557955126146152, - "loss": 3.9106, - "step": 10270 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005579131163520064, - "loss": 3.7303, - "step": 10275 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005578710871641165, - "loss": 3.6985, - "step": 10280 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005578290385856421, - "loss": 3.7783, - "step": 10285 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005577869706197456, - "loss": 3.7213, - "step": 10290 - }, - { - "epoch": 0.18, - "learning_rate": 0.00055774488326959, - "loss": 3.8223, - "step": 10295 - }, - { - "epoch": 0.18, - "learning_rate": 0.00055770277653834, - "loss": 3.8479, - "step": 10300 - }, - { - "epoch": 0.18, - "learning_rate": 0.000557660650429162, - "loss": 3.7619, - "step": 10305 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005576185049452235, - "loss": 3.7465, - "step": 10310 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005575763400896938, - "loss": 3.8019, - "step": 10315 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005575341558657433, - "loss": 3.7722, - "step": 10320 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005574919522765441, - "loss": 3.7072, - "step": 10325 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005574497293252698, - "loss": 3.8018, - "step": 10330 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005574074870150952, - "loss": 3.7442, - "step": 10335 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005573652253491968, - "loss": 3.7484, - "step": 10340 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005573229443307524, - "loss": 3.7656, - "step": 10345 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005572806439629414, - "loss": 3.7769, - "step": 10350 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005572383242489445, - "loss": 3.7266, - "step": 10355 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005571959851919439, - "loss": 3.8326, - "step": 10360 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005571536267951234, - "loss": 3.7286, - "step": 10365 - }, - { - "epoch": 0.18, - "learning_rate": 0.000557111249061668, - "loss": 3.7353, - "step": 10370 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005570688519947643, - "loss": 3.7861, - "step": 10375 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005570264355976005, - "loss": 3.746, - "step": 10380 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005569839998733659, - "loss": 3.8178, - "step": 10385 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005569415448252516, - "loss": 3.7118, - "step": 10390 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005568990704564498, - "loss": 3.8499, - "step": 10395 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005568565767701546, - "loss": 3.7467, - "step": 10400 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005568140637695611, - "loss": 3.7022, - "step": 10405 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005567715314578664, - "loss": 3.7888, - "step": 10410 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005567289798382682, - "loss": 3.661, - "step": 10415 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005566864089139665, - "loss": 3.7638, - "step": 10420 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005566438186881624, - "loss": 3.7379, - "step": 10425 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005566012091640585, - "loss": 3.7724, - "step": 10430 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005565585803448585, - "loss": 3.8414, - "step": 10435 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005565159322337682, - "loss": 3.7841, - "step": 10440 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005564732648339945, - "loss": 3.7352, - "step": 10445 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005564305781487456, - "loss": 3.796, - "step": 10450 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005563878721812314, - "loss": 3.681, - "step": 10455 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005563451469346632, - "loss": 3.6562, - "step": 10460 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005563024024122537, - "loss": 3.7886, - "step": 10465 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005562596386172172, - "loss": 3.7527, - "step": 10470 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005562168555527689, - "loss": 3.7763, - "step": 10475 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005561740532221265, - "loss": 3.7259, - "step": 10480 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005561312316285079, - "loss": 3.7692, - "step": 10485 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005560883907751334, - "loss": 3.753, - "step": 10490 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005560455306652243, - "loss": 3.7146, - "step": 10495 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005560026513020036, - "loss": 3.7457, - "step": 10500 - }, - { - "epoch": 0.18, - "eval_loss": 3.808900833129883, - "eval_runtime": 150.2733, - "eval_samples_per_second": 12.251, - "eval_steps_per_second": 0.772, - "step": 10500 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005559597526886955, - "loss": 3.7659, - "step": 10505 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005559168348285258, - "loss": 3.7797, - "step": 10510 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005558738977247216, - "loss": 3.7639, - "step": 10515 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005558309413805117, - "loss": 3.7582, - "step": 10520 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005557879657991261, - "loss": 3.7758, - "step": 10525 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005557449709837963, - "loss": 3.6406, - "step": 10530 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005557019569377555, - "loss": 3.7329, - "step": 10535 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005556589236642378, - "loss": 3.762, - "step": 10540 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005556158711664793, - "loss": 3.7226, - "step": 10545 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005555727994477175, - "loss": 3.7183, - "step": 10550 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005555297085111907, - "loss": 3.7314, - "step": 10555 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005554865983601396, - "loss": 3.829, - "step": 10560 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005554434689978056, - "loss": 3.7891, - "step": 10565 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005554003204274318, - "loss": 3.765, - "step": 10570 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005553571526522628, - "loss": 3.7701, - "step": 10575 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005553139656755446, - "loss": 3.7329, - "step": 10580 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005552707595005247, - "loss": 3.8377, - "step": 10585 - }, - { - "epoch": 0.18, - "learning_rate": 0.000555227534130452, - "loss": 3.6951, - "step": 10590 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005551842895685766, - "loss": 3.7439, - "step": 10595 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005551410258181506, - "loss": 3.764, - "step": 10600 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005550977428824269, - "loss": 3.7233, - "step": 10605 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005550544407646603, - "loss": 3.8349, - "step": 10610 - }, - { - "epoch": 0.18, - "learning_rate": 0.000555011119468107, - "loss": 3.606, - "step": 10615 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005549677789960242, - "loss": 3.7585, - "step": 10620 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005549244193516713, - "loss": 3.6967, - "step": 10625 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005548810405383084, - "loss": 3.7009, - "step": 10630 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005548376425591976, - "loss": 3.7488, - "step": 10635 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005547942254176021, - "loss": 3.8379, - "step": 10640 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005547507891167866, - "loss": 3.6371, - "step": 10645 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005547073336600174, - "loss": 3.7803, - "step": 10650 - }, - { - "epoch": 0.18, - "learning_rate": 0.000554663859050562, - "loss": 3.7893, - "step": 10655 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005546203652916895, - "loss": 3.7123, - "step": 10660 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005545768523866706, - "loss": 3.7559, - "step": 10665 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005545333203387768, - "loss": 3.7527, - "step": 10670 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005544897691512821, - "loss": 3.747, - "step": 10675 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005544461988274608, - "loss": 3.7641, - "step": 10680 - }, - { - "epoch": 0.18, - "learning_rate": 0.0005544026093705894, - "loss": 3.8162, - "step": 10685 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005543590007839455, - "loss": 3.8019, - "step": 10690 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005543153730708084, - "loss": 3.8212, - "step": 10695 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005542717262344586, - "loss": 3.6983, - "step": 10700 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005542280602781782, - "loss": 3.7003, - "step": 10705 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005541843752052504, - "loss": 3.8489, - "step": 10710 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005541406710189603, - "loss": 3.7563, - "step": 10715 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005540969477225942, - "loss": 3.8294, - "step": 10720 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005540532053194399, - "loss": 3.786, - "step": 10725 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005540094438127865, - "loss": 3.7697, - "step": 10730 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005539656632059247, - "loss": 3.7351, - "step": 10735 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005539218635021466, - "loss": 3.8407, - "step": 10740 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005538780447047457, - "loss": 3.7196, - "step": 10745 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005538342068170169, - "loss": 3.6508, - "step": 10750 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005537903498422567, - "loss": 3.7522, - "step": 10755 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005537464737837628, - "loss": 3.6545, - "step": 10760 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005537025786448346, - "loss": 3.7622, - "step": 10765 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005536586644287726, - "loss": 3.7217, - "step": 10770 - }, - { - "epoch": 0.19, - "learning_rate": 0.000553614731138879, - "loss": 3.6806, - "step": 10775 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005535707787784574, - "loss": 3.8162, - "step": 10780 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005535268073508128, - "loss": 3.7956, - "step": 10785 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005534828168592515, - "loss": 3.7212, - "step": 10790 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005534388073070815, - "loss": 3.8734, - "step": 10795 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005533947786976121, - "loss": 3.7601, - "step": 10800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005533507310341539, - "loss": 3.7531, - "step": 10805 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005533066643200191, - "loss": 3.8357, - "step": 10810 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005532625785585214, - "loss": 3.7786, - "step": 10815 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005532184737529757, - "loss": 3.8007, - "step": 10820 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005531743499066985, - "loss": 3.8569, - "step": 10825 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005531302070230077, - "loss": 3.729, - "step": 10830 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005530860451052226, - "loss": 3.7075, - "step": 10835 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005530418641566639, - "loss": 3.8074, - "step": 10840 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005529976641806539, - "loss": 3.7291, - "step": 10845 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005529534451805162, - "loss": 3.8529, - "step": 10850 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005529092071595756, - "loss": 3.7, - "step": 10855 - }, - { - "epoch": 0.19, - "learning_rate": 0.000552864950121159, - "loss": 3.65, - "step": 10860 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005528206740685938, - "loss": 3.729, - "step": 10865 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005527763790052099, - "loss": 3.822, - "step": 10870 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005527320649343375, - "loss": 3.7941, - "step": 10875 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005526877318593092, - "loss": 3.7147, - "step": 10880 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005526433797834584, - "loss": 3.6646, - "step": 10885 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005525990087101202, - "loss": 3.827, - "step": 10890 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005525546186426311, - "loss": 3.7701, - "step": 10895 - }, - { - "epoch": 0.19, - "learning_rate": 0.000552510209584329, - "loss": 3.6971, - "step": 10900 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005524657815385532, - "loss": 3.712, - "step": 10905 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005524213345086446, - "loss": 3.7302, - "step": 10910 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005523768684979453, - "loss": 3.7991, - "step": 10915 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005523323835097988, - "loss": 3.8046, - "step": 10920 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005522878795475503, - "loss": 3.746, - "step": 10925 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005522433566145462, - "loss": 3.6934, - "step": 10930 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005521988147141343, - "loss": 3.7359, - "step": 10935 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005521542538496642, - "loss": 3.6942, - "step": 10940 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005521096740244864, - "loss": 3.6852, - "step": 10945 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005520650752419531, - "loss": 3.7642, - "step": 10950 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005520204575054182, - "loss": 3.7516, - "step": 10955 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005519758208182362, - "loss": 3.8863, - "step": 10960 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005519311651837639, - "loss": 3.8518, - "step": 10965 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005518864906053592, - "loss": 3.6166, - "step": 10970 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005518417970863812, - "loss": 3.6787, - "step": 10975 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005517970846301908, - "loss": 3.6608, - "step": 10980 - }, - { - "epoch": 0.19, - "learning_rate": 0.00055175235324015, - "loss": 3.7437, - "step": 10985 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005517076029196225, - "loss": 3.7547, - "step": 10990 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005516628336719732, - "loss": 3.8382, - "step": 10995 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005516180455005686, - "loss": 3.9065, - "step": 11000 - }, - { - "epoch": 0.19, - "eval_loss": 3.82816481590271, - "eval_runtime": 150.3797, - "eval_samples_per_second": 12.242, - "eval_steps_per_second": 0.771, - "step": 11000 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005515732384087765, - "loss": 3.8359, - "step": 11005 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005515284123999662, - "loss": 3.7338, - "step": 11010 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005514835674775083, - "loss": 3.6409, - "step": 11015 - }, - { - "epoch": 0.19, - "learning_rate": 0.000551438703644775, - "loss": 3.6348, - "step": 11020 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005513938209051396, - "loss": 3.8714, - "step": 11025 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005513489192619775, - "loss": 3.7836, - "step": 11030 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005513039987186647, - "loss": 3.6187, - "step": 11035 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005512590592785791, - "loss": 3.7686, - "step": 11040 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005512141009451, - "loss": 3.7887, - "step": 11045 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005511691237216078, - "loss": 3.7462, - "step": 11050 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005511241276114849, - "loss": 3.7589, - "step": 11055 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005510791126181145, - "loss": 3.6582, - "step": 11060 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005510340787448815, - "loss": 3.6191, - "step": 11065 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005509890259951725, - "loss": 3.7391, - "step": 11070 - }, - { - "epoch": 0.19, - "learning_rate": 0.000550943954372375, - "loss": 3.7873, - "step": 11075 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005508988638798781, - "loss": 3.58, - "step": 11080 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005508537545210726, - "loss": 3.7628, - "step": 11085 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005508086262993502, - "loss": 3.684, - "step": 11090 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005507634792181046, - "loss": 3.808, - "step": 11095 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005507183132807304, - "loss": 3.6461, - "step": 11100 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005506731284906242, - "loss": 3.7431, - "step": 11105 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005506279248511832, - "loss": 3.6644, - "step": 11110 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005505827023658067, - "loss": 3.8145, - "step": 11115 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005505374610378953, - "loss": 3.7549, - "step": 11120 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005504922008708507, - "loss": 3.6777, - "step": 11125 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005504469218680763, - "loss": 3.6589, - "step": 11130 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005504016240329772, - "loss": 3.7951, - "step": 11135 - }, - { - "epoch": 0.19, - "learning_rate": 0.000550356307368959, - "loss": 3.6839, - "step": 11140 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005503109718794296, - "loss": 3.8131, - "step": 11145 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005502656175677979, - "loss": 3.7538, - "step": 11150 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005502202444374744, - "loss": 3.5535, - "step": 11155 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005501748524918709, - "loss": 3.6548, - "step": 11160 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005501294417344006, - "loss": 3.7523, - "step": 11165 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005500840121684781, - "loss": 3.778, - "step": 11170 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005500385637975196, - "loss": 3.6966, - "step": 11175 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005499930966249426, - "loss": 3.7497, - "step": 11180 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005499476106541659, - "loss": 3.7591, - "step": 11185 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005499021058886096, - "loss": 3.7704, - "step": 11190 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005498565823316959, - "loss": 3.8197, - "step": 11195 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005498110399868477, - "loss": 3.6668, - "step": 11200 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005497654788574896, - "loss": 3.7984, - "step": 11205 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005497198989470474, - "loss": 3.7465, - "step": 11210 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005496743002589487, - "loss": 3.6981, - "step": 11215 - }, - { - "epoch": 0.19, - "learning_rate": 0.000549628682796622, - "loss": 3.8666, - "step": 11220 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005495830465634978, - "loss": 3.7241, - "step": 11225 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005495373915630076, - "loss": 3.7521, - "step": 11230 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005494917177985844, - "loss": 3.717, - "step": 11235 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005494460252736627, - "loss": 3.8369, - "step": 11240 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005494003139916782, - "loss": 3.6984, - "step": 11245 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005493545839560682, - "loss": 3.7683, - "step": 11250 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005493088351702716, - "loss": 3.753, - "step": 11255 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005492630676377281, - "loss": 3.6862, - "step": 11260 - }, - { - "epoch": 0.19, - "learning_rate": 0.0005492172813618795, - "loss": 3.7831, - "step": 11265 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005491714763461684, - "loss": 3.6696, - "step": 11270 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005491256525940395, - "loss": 3.792, - "step": 11275 - }, - { - "epoch": 0.2, - "learning_rate": 0.000549079810108938, - "loss": 3.7118, - "step": 11280 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005490339488943115, - "loss": 3.7159, - "step": 11285 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005489880689536083, - "loss": 3.8581, - "step": 11290 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005489421702902783, - "loss": 3.7271, - "step": 11295 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005488962529077728, - "loss": 3.7899, - "step": 11300 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005488503168095449, - "loss": 3.7817, - "step": 11305 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005488043619990483, - "loss": 3.7262, - "step": 11310 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005487583884797389, - "loss": 3.7328, - "step": 11315 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005487123962550735, - "loss": 3.8268, - "step": 11320 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005486663853285106, - "loss": 3.6743, - "step": 11325 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005486203557035098, - "loss": 3.7256, - "step": 11330 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005485743073835325, - "loss": 3.6811, - "step": 11335 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005485282403720411, - "loss": 3.6535, - "step": 11340 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005484821546724998, - "loss": 3.7042, - "step": 11345 - }, - { - "epoch": 0.2, - "learning_rate": 0.000548436050288374, - "loss": 3.7035, - "step": 11350 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005483899272231303, - "loss": 3.6523, - "step": 11355 - }, - { - "epoch": 0.2, - "learning_rate": 0.000548343785480237, - "loss": 3.6676, - "step": 11360 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005482976250631639, - "loss": 3.7115, - "step": 11365 - }, - { - "epoch": 0.2, - "learning_rate": 0.000548251445975382, - "loss": 3.7758, - "step": 11370 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005482052482203634, - "loss": 3.7202, - "step": 11375 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005481590318015823, - "loss": 3.7921, - "step": 11380 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005481127967225137, - "loss": 3.5875, - "step": 11385 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005480665429866344, - "loss": 3.872, - "step": 11390 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005480202705974225, - "loss": 3.7499, - "step": 11395 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005479739795583573, - "loss": 3.8315, - "step": 11400 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005479276698729195, - "loss": 3.7723, - "step": 11405 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005478813415445916, - "loss": 3.7126, - "step": 11410 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005478349945768574, - "loss": 3.8067, - "step": 11415 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005477886289732015, - "loss": 3.6816, - "step": 11420 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005477422447371107, - "loss": 3.8123, - "step": 11425 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005476958418720728, - "loss": 3.6074, - "step": 11430 - }, - { - "epoch": 0.2, - "learning_rate": 0.000547649420381577, - "loss": 3.7801, - "step": 11435 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005476029802691138, - "loss": 3.7547, - "step": 11440 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005475565215381755, - "loss": 3.7165, - "step": 11445 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005475100441922556, - "loss": 3.721, - "step": 11450 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005474635482348488, - "loss": 3.6334, - "step": 11455 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005474170336694514, - "loss": 3.7961, - "step": 11460 - }, - { - "epoch": 0.2, - "learning_rate": 0.000547370500499561, - "loss": 3.7415, - "step": 11465 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005473239487286767, - "loss": 3.6964, - "step": 11470 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005472773783602991, - "loss": 3.7686, - "step": 11475 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005472307893979298, - "loss": 3.7813, - "step": 11480 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005471841818450722, - "loss": 3.8297, - "step": 11485 - }, - { - "epoch": 0.2, - "learning_rate": 0.000547137555705231, - "loss": 3.6398, - "step": 11490 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005470909109819122, - "loss": 3.8471, - "step": 11495 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005470442476786231, - "loss": 3.636, - "step": 11500 - }, - { - "epoch": 0.2, - "eval_loss": 3.7674803733825684, - "eval_runtime": 150.2732, - "eval_samples_per_second": 12.251, - "eval_steps_per_second": 0.772, - "step": 11500 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005469975657988727, - "loss": 3.7453, - "step": 11505 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005469508653461713, - "loss": 3.6952, - "step": 11510 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005469041463240302, - "loss": 3.6867, - "step": 11515 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005468574087359628, - "loss": 3.7732, - "step": 11520 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005468106525854834, - "loss": 3.7456, - "step": 11525 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005467638778761078, - "loss": 3.7178, - "step": 11530 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005467170846113532, - "loss": 3.6693, - "step": 11535 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005466702727947383, - "loss": 3.7435, - "step": 11540 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005466234424297829, - "loss": 3.7004, - "step": 11545 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005465765935200086, - "loss": 3.8126, - "step": 11550 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005465297260689383, - "loss": 3.7205, - "step": 11555 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005464828400800959, - "loss": 3.7958, - "step": 11560 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005464359355570071, - "loss": 3.745, - "step": 11565 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005463890125031991, - "loss": 3.681, - "step": 11570 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005463420709222, - "loss": 3.6648, - "step": 11575 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005462951108175396, - "loss": 3.7188, - "step": 11580 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005462481321927492, - "loss": 3.6209, - "step": 11585 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005462011350513611, - "loss": 3.8103, - "step": 11590 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005461541193969095, - "loss": 3.7334, - "step": 11595 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005461070852329296, - "loss": 3.8183, - "step": 11600 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005460600325629582, - "loss": 3.7361, - "step": 11605 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005460129613905334, - "loss": 3.6082, - "step": 11610 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005459658717191948, - "loss": 3.7858, - "step": 11615 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005459187635524831, - "loss": 3.7071, - "step": 11620 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005458716368939407, - "loss": 3.8105, - "step": 11625 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005458244917471112, - "loss": 3.7418, - "step": 11630 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005457773281155398, - "loss": 3.7896, - "step": 11635 - }, - { - "epoch": 0.2, - "learning_rate": 0.000545730146002773, - "loss": 3.7541, - "step": 11640 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005456829454123585, - "loss": 3.7075, - "step": 11645 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005456357263478456, - "loss": 3.7605, - "step": 11650 - }, - { - "epoch": 0.2, - "learning_rate": 0.000545588488812785, - "loss": 3.7757, - "step": 11655 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005455412328107287, - "loss": 3.7652, - "step": 11660 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005454939583452299, - "loss": 3.7985, - "step": 11665 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005454466654198437, - "loss": 3.7654, - "step": 11670 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005453993540381261, - "loss": 3.6536, - "step": 11675 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005453520242036348, - "loss": 3.7901, - "step": 11680 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005453046759199286, - "loss": 3.6965, - "step": 11685 - }, - { - "epoch": 0.2, - "learning_rate": 0.000545257309190568, - "loss": 3.7089, - "step": 11690 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005452099240191146, - "loss": 3.7599, - "step": 11695 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005451625204091317, - "loss": 3.6116, - "step": 11700 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005451150983641835, - "loss": 3.7526, - "step": 11705 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005450676578878362, - "loss": 3.7258, - "step": 11710 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005450201989836571, - "loss": 3.7765, - "step": 11715 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005449727216552147, - "loss": 3.7091, - "step": 11720 - }, - { - "epoch": 0.2, - "learning_rate": 0.000544925225906079, - "loss": 3.6662, - "step": 11725 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005448777117398216, - "loss": 3.6813, - "step": 11730 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005448301791600152, - "loss": 3.6477, - "step": 11735 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005447826281702342, - "loss": 3.6587, - "step": 11740 - }, - { - "epoch": 0.2, - "learning_rate": 0.000544735058774054, - "loss": 3.7099, - "step": 11745 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005446874709750516, - "loss": 3.7078, - "step": 11750 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005446398647768053, - "loss": 3.7051, - "step": 11755 - }, - { - "epoch": 0.2, - "learning_rate": 0.000544592240182895, - "loss": 3.7258, - "step": 11760 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005445445971969017, - "loss": 3.6229, - "step": 11765 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005444969358224082, - "loss": 3.8427, - "step": 11770 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005444492560629979, - "loss": 3.6114, - "step": 11775 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005444015579222564, - "loss": 3.7371, - "step": 11780 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005443538414037702, - "loss": 3.704, - "step": 11785 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005443061065111275, - "loss": 3.7573, - "step": 11790 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005442583532479177, - "loss": 3.7781, - "step": 11795 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005442105816177315, - "loss": 3.7478, - "step": 11800 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005441627916241611, - "loss": 3.6679, - "step": 11805 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005441149832708, - "loss": 3.5633, - "step": 11810 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005440671565612434, - "loss": 3.6967, - "step": 11815 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005440193114990873, - "loss": 3.6898, - "step": 11820 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005439714480879297, - "loss": 3.6566, - "step": 11825 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005439235663313695, - "loss": 3.6392, - "step": 11830 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005438756662330071, - "loss": 3.7343, - "step": 11835 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005438277477964445, - "loss": 3.7082, - "step": 11840 - }, - { - "epoch": 0.2, - "learning_rate": 0.0005437798110252849, - "loss": 3.6861, - "step": 11845 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005437318559231328, - "loss": 3.8254, - "step": 11850 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005436838824935943, - "loss": 3.7302, - "step": 11855 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005436358907402767, - "loss": 3.7656, - "step": 11860 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005435878806667886, - "loss": 3.7166, - "step": 11865 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005435398522767404, - "loss": 3.6894, - "step": 11870 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005434918055737434, - "loss": 3.7122, - "step": 11875 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005434437405614104, - "loss": 3.7014, - "step": 11880 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005433956572433558, - "loss": 3.7079, - "step": 11885 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005433475556231951, - "loss": 3.6731, - "step": 11890 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005432994357045455, - "loss": 3.7536, - "step": 11895 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005432512974910252, - "loss": 3.58, - "step": 11900 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005432031409862538, - "loss": 3.7737, - "step": 11905 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005431549661938527, - "loss": 3.6355, - "step": 11910 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005431067731174443, - "loss": 3.7166, - "step": 11915 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005430585617606524, - "loss": 3.7184, - "step": 11920 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005430103321271025, - "loss": 3.7067, - "step": 11925 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005429620842204209, - "loss": 3.8456, - "step": 11930 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005429138180442358, - "loss": 3.6251, - "step": 11935 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005428655336021763, - "loss": 3.7128, - "step": 11940 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005428172308978735, - "loss": 3.7401, - "step": 11945 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005427689099349593, - "loss": 3.7788, - "step": 11950 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005427205707170674, - "loss": 3.759, - "step": 11955 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005426722132478323, - "loss": 3.78, - "step": 11960 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005426238375308906, - "loss": 3.7712, - "step": 11965 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005425754435698796, - "loss": 3.7476, - "step": 11970 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005425270313684385, - "loss": 3.7033, - "step": 11975 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005424786009302075, - "loss": 3.6609, - "step": 11980 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005424301522588285, - "loss": 3.7616, - "step": 11985 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005423816853579444, - "loss": 3.6577, - "step": 11990 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005423332002311998, - "loss": 3.7224, - "step": 11995 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005422846968822404, - "loss": 3.6866, - "step": 12000 - }, - { - "epoch": 0.21, - "eval_loss": 3.7599782943725586, - "eval_runtime": 150.9725, - "eval_samples_per_second": 12.194, - "eval_steps_per_second": 0.768, - "step": 12000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005422361753147136, - "loss": 3.7354, - "step": 12005 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005421876355322677, - "loss": 3.7213, - "step": 12010 - }, - { - "epoch": 0.21, - "learning_rate": 0.000542139077538553, - "loss": 3.7977, - "step": 12015 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005420905013372205, - "loss": 3.786, - "step": 12020 - }, - { - "epoch": 0.21, - "learning_rate": 0.000542041906931923, - "loss": 3.7917, - "step": 12025 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005419932943263146, - "loss": 3.7851, - "step": 12030 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005419446635240505, - "loss": 3.7055, - "step": 12035 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005418960145287878, - "loss": 3.7075, - "step": 12040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005418473473441845, - "loss": 3.77, - "step": 12045 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005417986619739001, - "loss": 3.6612, - "step": 12050 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005417499584215955, - "loss": 3.5958, - "step": 12055 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005417012366909329, - "loss": 3.7396, - "step": 12060 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005416524967855761, - "loss": 3.6948, - "step": 12065 - }, - { - "epoch": 0.21, - "learning_rate": 0.00054160373870919, - "loss": 3.7575, - "step": 12070 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005415549624654409, - "loss": 3.7255, - "step": 12075 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005415061680579966, - "loss": 3.751, - "step": 12080 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005414573554905262, - "loss": 3.7404, - "step": 12085 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005414085247667002, - "loss": 3.8004, - "step": 12090 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005413596758901901, - "loss": 3.649, - "step": 12095 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005413108088646695, - "loss": 3.7088, - "step": 12100 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005412619236938126, - "loss": 3.6278, - "step": 12105 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005412130203812957, - "loss": 3.7007, - "step": 12110 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005411640989307958, - "loss": 3.7458, - "step": 12115 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005411151593459915, - "loss": 3.7283, - "step": 12120 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005410662016305629, - "loss": 3.7408, - "step": 12125 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005410172257881915, - "loss": 3.7432, - "step": 12130 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005409682318225599, - "loss": 3.7822, - "step": 12135 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005409192197373522, - "loss": 3.7222, - "step": 12140 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005408701895362538, - "loss": 3.741, - "step": 12145 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005408211412229517, - "loss": 3.5775, - "step": 12150 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005407720748011338, - "loss": 3.7984, - "step": 12155 - }, - { - "epoch": 0.21, - "learning_rate": 0.00054072299027449, - "loss": 3.752, - "step": 12160 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005406738876467109, - "loss": 3.706, - "step": 12165 - }, - { - "epoch": 0.21, - "learning_rate": 0.000540624766921489, - "loss": 3.7555, - "step": 12170 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005405756281025176, - "loss": 3.7497, - "step": 12175 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005405264711934922, - "loss": 3.6543, - "step": 12180 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005404772961981086, - "loss": 3.6224, - "step": 12185 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005404281031200648, - "loss": 3.6435, - "step": 12190 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005403788919630598, - "loss": 3.7747, - "step": 12195 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005403296627307942, - "loss": 3.5874, - "step": 12200 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005402804154269695, - "loss": 3.5728, - "step": 12205 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005402311500552889, - "loss": 3.7743, - "step": 12210 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005401818666194571, - "loss": 3.6319, - "step": 12215 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005401325651231798, - "loss": 3.6402, - "step": 12220 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005400832455701644, - "loss": 3.6527, - "step": 12225 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005400339079641192, - "loss": 3.7487, - "step": 12230 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005399845523087543, - "loss": 3.6764, - "step": 12235 - }, - { - "epoch": 0.21, - "learning_rate": 0.000539935178607781, - "loss": 3.756, - "step": 12240 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005398857868649118, - "loss": 3.5799, - "step": 12245 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005398363770838609, - "loss": 3.6562, - "step": 12250 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005397869492683436, - "loss": 3.6615, - "step": 12255 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005397375034220767, - "loss": 3.7017, - "step": 12260 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005396880395487781, - "loss": 3.7083, - "step": 12265 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005396385576521672, - "loss": 3.7206, - "step": 12270 - }, - { - "epoch": 0.21, - "learning_rate": 0.000539589057735965, - "loss": 3.6814, - "step": 12275 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005395395398038935, - "loss": 3.5883, - "step": 12280 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005394900038596763, - "loss": 3.6049, - "step": 12285 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005394404499070381, - "loss": 3.7757, - "step": 12290 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005393908779497052, - "loss": 3.7323, - "step": 12295 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005393412879914051, - "loss": 3.7704, - "step": 12300 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005392916800358667, - "loss": 3.7201, - "step": 12305 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005392420540868205, - "loss": 3.7927, - "step": 12310 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005391924101479978, - "loss": 3.656, - "step": 12315 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005391427482231317, - "loss": 3.7216, - "step": 12320 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005390930683159566, - "loss": 3.7133, - "step": 12325 - }, - { - "epoch": 0.21, - "learning_rate": 0.000539043370430208, - "loss": 3.6312, - "step": 12330 - }, - { - "epoch": 0.21, - "learning_rate": 0.000538993654569623, - "loss": 3.643, - "step": 12335 - }, - { - "epoch": 0.21, - "learning_rate": 0.00053894392073794, - "loss": 3.5749, - "step": 12340 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005388941689388988, - "loss": 3.5516, - "step": 12345 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005388443991762405, - "loss": 3.7479, - "step": 12350 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005387946114537072, - "loss": 3.6775, - "step": 12355 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005387448057750431, - "loss": 3.6478, - "step": 12360 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005386949821439932, - "loss": 3.7257, - "step": 12365 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005386451405643038, - "loss": 3.6918, - "step": 12370 - }, - { - "epoch": 0.21, - "learning_rate": 0.000538595281039723, - "loss": 3.6817, - "step": 12375 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005385454035739996, - "loss": 3.6628, - "step": 12380 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005384955081708847, - "loss": 3.6527, - "step": 12385 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005384455948341296, - "loss": 3.7236, - "step": 12390 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005383956635674878, - "loss": 3.5499, - "step": 12395 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005383457143747141, - "loss": 3.7324, - "step": 12400 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005382957472595638, - "loss": 3.6974, - "step": 12405 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005382457622257949, - "loss": 3.6924, - "step": 12410 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005381957592771654, - "loss": 3.5955, - "step": 12415 - }, - { - "epoch": 0.21, - "learning_rate": 0.0005381457384174358, - "loss": 3.6657, - "step": 12420 - }, - { - "epoch": 0.22, - "learning_rate": 0.000538095699650367, - "loss": 3.7176, - "step": 12425 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005380456429797218, - "loss": 3.6794, - "step": 12430 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005379955684092643, - "loss": 3.7586, - "step": 12435 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005379454759427597, - "loss": 3.7462, - "step": 12440 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005378953655839747, - "loss": 3.6993, - "step": 12445 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005378452373366776, - "loss": 3.6243, - "step": 12450 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005377950912046374, - "loss": 3.6997, - "step": 12455 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005377449271916251, - "loss": 3.6549, - "step": 12460 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005376947453014126, - "loss": 3.7064, - "step": 12465 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005376445455377736, - "loss": 3.6477, - "step": 12470 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005375943279044825, - "loss": 3.7082, - "step": 12475 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005375440924053156, - "loss": 3.6758, - "step": 12480 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005374938390440505, - "loss": 3.6828, - "step": 12485 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005374435678244658, - "loss": 3.6934, - "step": 12490 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005373932787503417, - "loss": 3.6921, - "step": 12495 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005373429718254595, - "loss": 3.6586, - "step": 12500 - }, - { - "epoch": 0.22, - "eval_loss": 3.7146527767181396, - "eval_runtime": 150.1722, - "eval_samples_per_second": 12.259, - "eval_steps_per_second": 0.772, - "step": 12500 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005372926470536023, - "loss": 3.759, - "step": 12505 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005372423044385541, - "loss": 3.6515, - "step": 12510 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005371919439841005, - "loss": 3.8031, - "step": 12515 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005371415656940283, - "loss": 3.6402, - "step": 12520 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005370911695721256, - "loss": 3.7482, - "step": 12525 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005370407556221821, - "loss": 3.7288, - "step": 12530 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005369903238479886, - "loss": 3.6749, - "step": 12535 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005369398742533373, - "loss": 3.6862, - "step": 12540 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005368894068420217, - "loss": 3.6132, - "step": 12545 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005368389216178368, - "loss": 3.666, - "step": 12550 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005367884185845787, - "loss": 3.5704, - "step": 12555 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005367378977460452, - "loss": 3.7454, - "step": 12560 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005366873591060349, - "loss": 3.7277, - "step": 12565 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005366368026683483, - "loss": 3.7656, - "step": 12570 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005365862284367869, - "loss": 3.8027, - "step": 12575 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005365356364151535, - "loss": 3.6515, - "step": 12580 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005364850266072525, - "loss": 3.6866, - "step": 12585 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005364343990168895, - "loss": 3.6147, - "step": 12590 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005363837536478714, - "loss": 3.6964, - "step": 12595 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005363330905040064, - "loss": 3.6638, - "step": 12600 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005362824095891043, - "loss": 3.6581, - "step": 12605 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005362317109069758, - "loss": 3.7257, - "step": 12610 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005361809944614334, - "loss": 3.6829, - "step": 12615 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005361302602562907, - "loss": 3.6478, - "step": 12620 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005360795082953624, - "loss": 3.6036, - "step": 12625 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005360287385824651, - "loss": 3.6498, - "step": 12630 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005359779511214162, - "loss": 3.749, - "step": 12635 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005359271459160347, - "loss": 3.6121, - "step": 12640 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005358763229701411, - "loss": 3.6755, - "step": 12645 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005358254822875567, - "loss": 3.6591, - "step": 12650 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005357746238721047, - "loss": 3.6631, - "step": 12655 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005357237477276093, - "loss": 3.6449, - "step": 12660 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005356728538578961, - "loss": 3.6601, - "step": 12665 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005356219422667922, - "loss": 3.6639, - "step": 12670 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005355710129581257, - "loss": 3.693, - "step": 12675 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005355200659357262, - "loss": 3.6938, - "step": 12680 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005354691012034249, - "loss": 3.715, - "step": 12685 - }, - { - "epoch": 0.22, - "learning_rate": 0.000535418118765054, - "loss": 3.521, - "step": 12690 - }, - { - "epoch": 0.22, - "learning_rate": 0.000535367118624447, - "loss": 3.692, - "step": 12695 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005353161007854389, - "loss": 3.6524, - "step": 12700 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005352650652518659, - "loss": 3.6092, - "step": 12705 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005352140120275659, - "loss": 3.712, - "step": 12710 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005351629411163776, - "loss": 3.7175, - "step": 12715 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005351118525221412, - "loss": 3.6494, - "step": 12720 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005350607462486985, - "loss": 3.7582, - "step": 12725 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005350096222998924, - "loss": 3.6472, - "step": 12730 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005349584806795671, - "loss": 3.6468, - "step": 12735 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005349073213915681, - "loss": 3.6471, - "step": 12740 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005348561444397425, - "loss": 3.7198, - "step": 12745 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005348049498279384, - "loss": 3.5422, - "step": 12750 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005347537375600055, - "loss": 3.6621, - "step": 12755 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005347025076397945, - "loss": 3.6559, - "step": 12760 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005346512600711579, - "loss": 3.6488, - "step": 12765 - }, - { - "epoch": 0.22, - "learning_rate": 0.000534599994857949, - "loss": 3.6929, - "step": 12770 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005345487120040229, - "loss": 3.605, - "step": 12775 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005344974115132356, - "loss": 3.7029, - "step": 12780 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005344460933894448, - "loss": 3.5666, - "step": 12785 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005343947576365093, - "loss": 3.6138, - "step": 12790 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005343434042582892, - "loss": 3.6309, - "step": 12795 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005342920332586462, - "loss": 3.6865, - "step": 12800 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005342406446414429, - "loss": 3.5163, - "step": 12805 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005341892384105436, - "loss": 3.7673, - "step": 12810 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005341378145698139, - "loss": 3.7516, - "step": 12815 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005340863731231204, - "loss": 3.6601, - "step": 12820 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005340349140743314, - "loss": 3.6487, - "step": 12825 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005339834374273162, - "loss": 3.594, - "step": 12830 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005339319431859455, - "loss": 3.6715, - "step": 12835 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005338804313540918, - "loss": 3.6531, - "step": 12840 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005338289019356283, - "loss": 3.6891, - "step": 12845 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005337773549344294, - "loss": 3.722, - "step": 12850 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005337257903543719, - "loss": 3.6754, - "step": 12855 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005336742081993325, - "loss": 3.723, - "step": 12860 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005336226084731903, - "loss": 3.5262, - "step": 12865 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005335709911798253, - "loss": 3.6648, - "step": 12870 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005335193563231188, - "loss": 3.755, - "step": 12875 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005334677039069534, - "loss": 3.6505, - "step": 12880 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005334160339352131, - "loss": 3.5853, - "step": 12885 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005333643464117834, - "loss": 3.6742, - "step": 12890 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005333126413405508, - "loss": 3.7451, - "step": 12895 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005332609187254033, - "loss": 3.5786, - "step": 12900 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005332091785702301, - "loss": 3.6852, - "step": 12905 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005331574208789219, - "loss": 3.7323, - "step": 12910 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005331056456553705, - "loss": 3.5534, - "step": 12915 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005330538529034692, - "loss": 3.6558, - "step": 12920 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005330020426271126, - "loss": 3.5388, - "step": 12925 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005329502148301965, - "loss": 3.7418, - "step": 12930 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005328983695166181, - "loss": 3.6543, - "step": 12935 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005328465066902759, - "loss": 3.6607, - "step": 12940 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005327946263550698, - "loss": 3.796, - "step": 12945 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005327427285149006, - "loss": 3.6999, - "step": 12950 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005326908131736712, - "loss": 3.5925, - "step": 12955 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005326388803352851, - "loss": 3.675, - "step": 12960 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005325869300036475, - "loss": 3.72, - "step": 12965 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005325349621826648, - "loss": 3.7205, - "step": 12970 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005324829768762445, - "loss": 3.6847, - "step": 12975 - }, - { - "epoch": 0.22, - "learning_rate": 0.000532430974088296, - "loss": 3.7384, - "step": 12980 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005323789538227293, - "loss": 3.6207, - "step": 12985 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005323269160834562, - "loss": 3.6368, - "step": 12990 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005322748608743897, - "loss": 3.5439, - "step": 12995 - }, - { - "epoch": 0.22, - "learning_rate": 0.0005322227881994441, - "loss": 3.6824, - "step": 13000 - }, - { - "epoch": 0.22, - "eval_loss": 3.6955907344818115, - "eval_runtime": 150.2757, - "eval_samples_per_second": 12.251, - "eval_steps_per_second": 0.772, - "step": 13000 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005321706980625349, - "loss": 3.6233, - "step": 13005 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005321185904675791, - "loss": 3.6698, - "step": 13010 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005320664654184947, - "loss": 3.693, - "step": 13015 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005320143229192016, - "loss": 3.6562, - "step": 13020 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005319621629736205, - "loss": 3.7259, - "step": 13025 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005319099855856734, - "loss": 3.6133, - "step": 13030 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005318577907592841, - "loss": 3.626, - "step": 13035 - }, - { - "epoch": 0.23, - "learning_rate": 0.000531805578498377, - "loss": 3.6176, - "step": 13040 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005317533488068784, - "loss": 3.7022, - "step": 13045 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005317011016887157, - "loss": 3.5367, - "step": 13050 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005316488371478177, - "loss": 3.6384, - "step": 13055 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005315965551881143, - "loss": 3.6755, - "step": 13060 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005315442558135368, - "loss": 3.7178, - "step": 13065 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005314919390280179, - "loss": 3.6934, - "step": 13070 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005314396048354916, - "loss": 3.634, - "step": 13075 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005313872532398932, - "loss": 3.5333, - "step": 13080 - }, - { - "epoch": 0.23, - "learning_rate": 0.000531334884245159, - "loss": 3.6866, - "step": 13085 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005312824978552272, - "loss": 3.6536, - "step": 13090 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005312300940740369, - "loss": 3.7084, - "step": 13095 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005311776729055286, - "loss": 3.637, - "step": 13100 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005311252343536439, - "loss": 3.7074, - "step": 13105 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005310727784223261, - "loss": 3.7197, - "step": 13110 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005310203051155198, - "loss": 3.6676, - "step": 13115 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005309678144371703, - "loss": 3.5821, - "step": 13120 - }, - { - "epoch": 0.23, - "learning_rate": 0.000530915306391225, - "loss": 3.6428, - "step": 13125 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005308627809816319, - "loss": 3.7832, - "step": 13130 - }, - { - "epoch": 0.23, - "learning_rate": 0.000530810238212341, - "loss": 3.6682, - "step": 13135 - }, - { - "epoch": 0.23, - "learning_rate": 0.000530757678087303, - "loss": 3.6524, - "step": 13140 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005307051006104701, - "loss": 3.6169, - "step": 13145 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005306525057857961, - "loss": 3.6496, - "step": 13150 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005305998936172357, - "loss": 3.6896, - "step": 13155 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005305472641087452, - "loss": 3.6579, - "step": 13160 - }, - { - "epoch": 0.23, - "learning_rate": 0.000530494617264282, - "loss": 3.7858, - "step": 13165 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005304419530878046, - "loss": 3.7619, - "step": 13170 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005303892715832736, - "loss": 3.784, - "step": 13175 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005303365727546499, - "loss": 3.7082, - "step": 13180 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005302838566058964, - "loss": 3.6019, - "step": 13185 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005302311231409773, - "loss": 3.641, - "step": 13190 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005301783723638574, - "loss": 3.6698, - "step": 13195 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005301256042785036, - "loss": 3.6359, - "step": 13200 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005300728188888838, - "loss": 3.7043, - "step": 13205 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005300200161989672, - "loss": 3.7608, - "step": 13210 - }, - { - "epoch": 0.23, - "learning_rate": 0.000529967196212724, - "loss": 3.6878, - "step": 13215 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005299143589341263, - "loss": 3.6146, - "step": 13220 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005298615043671472, - "loss": 3.7112, - "step": 13225 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005298086325157609, - "loss": 3.6229, - "step": 13230 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005297557433839432, - "loss": 3.6452, - "step": 13235 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005297028369756711, - "loss": 3.6943, - "step": 13240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005296499132949227, - "loss": 3.6575, - "step": 13245 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005295969723456781, - "loss": 3.5772, - "step": 13250 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005295440141319176, - "loss": 3.6112, - "step": 13255 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005294910386576237, - "loss": 3.7853, - "step": 13260 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005294380459267797, - "loss": 3.6779, - "step": 13265 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005293850359433706, - "loss": 3.7071, - "step": 13270 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005293320087113824, - "loss": 3.7668, - "step": 13275 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005292789642348024, - "loss": 3.7479, - "step": 13280 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005292259025176194, - "loss": 3.6091, - "step": 13285 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005291728235638232, - "loss": 3.6423, - "step": 13290 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005291197273774053, - "loss": 3.631, - "step": 13295 - }, - { - "epoch": 0.23, - "learning_rate": 0.000529066613962358, - "loss": 3.6896, - "step": 13300 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005290134833226752, - "loss": 3.726, - "step": 13305 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005289603354623523, - "loss": 3.6359, - "step": 13310 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005289071703853855, - "loss": 3.6416, - "step": 13315 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005288539880957727, - "loss": 3.7228, - "step": 13320 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005288007885975127, - "loss": 3.6365, - "step": 13325 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005287475718946061, - "loss": 3.7192, - "step": 13330 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005286943379910543, - "loss": 3.7571, - "step": 13335 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005286410868908604, - "loss": 3.6936, - "step": 13340 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005285878185980285, - "loss": 3.6531, - "step": 13345 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005285345331165641, - "loss": 3.6293, - "step": 13350 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005284812304504742, - "loss": 3.7047, - "step": 13355 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005284279106037666, - "loss": 3.6494, - "step": 13360 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005283745735804508, - "loss": 3.7135, - "step": 13365 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005283212193845374, - "loss": 3.6306, - "step": 13370 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005282678480200385, - "loss": 3.5678, - "step": 13375 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005282144594909674, - "loss": 3.6169, - "step": 13380 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005281610538013384, - "loss": 3.6236, - "step": 13385 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005281076309551674, - "loss": 3.6551, - "step": 13390 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005280541909564717, - "loss": 3.7224, - "step": 13395 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005280007338092696, - "loss": 3.5341, - "step": 13400 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005279472595175807, - "loss": 3.6211, - "step": 13405 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005278937680854262, - "loss": 3.5654, - "step": 13410 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005278402595168282, - "loss": 3.7164, - "step": 13415 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005277867338158104, - "loss": 3.7283, - "step": 13420 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005277331909863976, - "loss": 3.6716, - "step": 13425 - }, - { - "epoch": 0.23, - "learning_rate": 0.000527679631032616, - "loss": 3.7008, - "step": 13430 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005276260539584928, - "loss": 3.6196, - "step": 13435 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005275724597680571, - "loss": 3.6388, - "step": 13440 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005275188484653386, - "loss": 3.6706, - "step": 13445 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005274652200543688, - "loss": 3.6409, - "step": 13450 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005274115745391801, - "loss": 3.5821, - "step": 13455 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005273579119238065, - "loss": 3.6809, - "step": 13460 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005273042322122832, - "loss": 3.7153, - "step": 13465 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005272505354086465, - "loss": 3.6738, - "step": 13470 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005271968215169341, - "loss": 3.6512, - "step": 13475 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005271430905411851, - "loss": 3.6398, - "step": 13480 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005270893424854399, - "loss": 3.5679, - "step": 13485 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005270355773537398, - "loss": 3.6169, - "step": 13490 - }, - { - "epoch": 0.23, - "learning_rate": 0.000526981795150128, - "loss": 3.6797, - "step": 13495 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005269279958786484, - "loss": 3.565, - "step": 13500 - }, - { - "epoch": 0.23, - "eval_loss": 3.6858224868774414, - "eval_runtime": 149.7698, - "eval_samples_per_second": 12.292, - "eval_steps_per_second": 0.775, - "step": 13500 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005268741795433464, - "loss": 3.7488, - "step": 13505 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005268203461482689, - "loss": 3.6822, - "step": 13510 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005267664956974638, - "loss": 3.5031, - "step": 13515 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005267126281949804, - "loss": 3.6234, - "step": 13520 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005266587436448692, - "loss": 3.6111, - "step": 13525 - }, - { - "epoch": 0.23, - "learning_rate": 0.000526604842051182, - "loss": 3.5746, - "step": 13530 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005265509234179722, - "loss": 3.6438, - "step": 13535 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005264969877492939, - "loss": 3.6441, - "step": 13540 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005264430350492029, - "loss": 3.5713, - "step": 13545 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005263890653217562, - "loss": 3.604, - "step": 13550 - }, - { - "epoch": 0.23, - "learning_rate": 0.000526335078571012, - "loss": 3.6513, - "step": 13555 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005262810748010298, - "loss": 3.6257, - "step": 13560 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005262270540158705, - "loss": 3.6804, - "step": 13565 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005261730162195962, - "loss": 3.7044, - "step": 13570 - }, - { - "epoch": 0.23, - "learning_rate": 0.0005261189614162701, - "loss": 3.6778, - "step": 13575 - }, - { - "epoch": 0.24, - "learning_rate": 0.000526064889609957, - "loss": 3.6329, - "step": 13580 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005260108008047227, - "loss": 3.6702, - "step": 13585 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005259566950046344, - "loss": 3.6102, - "step": 13590 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005259025722137608, - "loss": 3.5786, - "step": 13595 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005258484324361715, - "loss": 3.599, - "step": 13600 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005257942756759375, - "loss": 3.6527, - "step": 13605 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005257401019371312, - "loss": 3.7808, - "step": 13610 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005256859112238259, - "loss": 3.5285, - "step": 13615 - }, - { - "epoch": 0.24, - "learning_rate": 0.000525631703540097, - "loss": 3.6049, - "step": 13620 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005255774788900201, - "loss": 3.431, - "step": 13625 - }, - { - "epoch": 0.24, - "learning_rate": 0.000525523237277673, - "loss": 3.5476, - "step": 13630 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005254689787071342, - "loss": 3.5949, - "step": 13635 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005254147031824837, - "loss": 3.7066, - "step": 13640 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005253604107078027, - "loss": 3.6354, - "step": 13645 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005253061012871738, - "loss": 3.5766, - "step": 13650 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005252517749246809, - "loss": 3.7445, - "step": 13655 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005251974316244088, - "loss": 3.7481, - "step": 13660 - }, - { - "epoch": 0.24, - "learning_rate": 0.000525143071390444, - "loss": 3.521, - "step": 13665 - }, - { - "epoch": 0.24, - "learning_rate": 0.000525088694226874, - "loss": 3.7559, - "step": 13670 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005250343001377877, - "loss": 3.6996, - "step": 13675 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005249798891272755, - "loss": 3.6476, - "step": 13680 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005249254611994286, - "loss": 3.7529, - "step": 13685 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005248710163583396, - "loss": 3.7192, - "step": 13690 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005248165546081027, - "loss": 3.6813, - "step": 13695 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005247620759528131, - "loss": 3.6612, - "step": 13700 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005247075803965672, - "loss": 3.5626, - "step": 13705 - }, - { - "epoch": 0.24, - "learning_rate": 0.000524653067943463, - "loss": 3.6624, - "step": 13710 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005245985385975993, - "loss": 3.671, - "step": 13715 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005245439923630765, - "loss": 3.5639, - "step": 13720 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005244894292439963, - "loss": 3.5779, - "step": 13725 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005244348492444615, - "loss": 3.6789, - "step": 13730 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005243802523685763, - "loss": 3.6376, - "step": 13735 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005243256386204461, - "loss": 3.5759, - "step": 13740 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005242710080041774, - "loss": 3.6651, - "step": 13745 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005242163605238783, - "loss": 3.7021, - "step": 13750 - }, - { - "epoch": 0.24, - "learning_rate": 0.000524161696183658, - "loss": 3.5692, - "step": 13755 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005241070149876269, - "loss": 3.6352, - "step": 13760 - }, - { - "epoch": 0.24, - "learning_rate": 0.000524052316939897, - "loss": 3.799, - "step": 13765 - }, - { - "epoch": 0.24, - "learning_rate": 0.000523997602044581, - "loss": 3.6014, - "step": 13770 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005239428703057933, - "loss": 3.5783, - "step": 13775 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005238881217276493, - "loss": 3.6207, - "step": 13780 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005238333563142661, - "loss": 3.652, - "step": 13785 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005237785740697617, - "loss": 3.5884, - "step": 13790 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005237237749982552, - "loss": 3.6441, - "step": 13795 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005236689591038674, - "loss": 3.6444, - "step": 13800 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005236141263907203, - "loss": 3.5998, - "step": 13805 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005235592768629368, - "loss": 3.6522, - "step": 13810 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005235044105246415, - "loss": 3.6656, - "step": 13815 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005234495273799598, - "loss": 3.6451, - "step": 13820 - }, - { - "epoch": 0.24, - "learning_rate": 0.000523394627433019, - "loss": 3.7121, - "step": 13825 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005233397106879469, - "loss": 4.2102, - "step": 13830 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005232847771488733, - "loss": 4.3941, - "step": 13835 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005232298268199288, - "loss": 4.4938, - "step": 13840 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005231748597052452, - "loss": 4.4764, - "step": 13845 - }, - { - "epoch": 0.24, - "learning_rate": 0.000523119875808956, - "loss": 4.1145, - "step": 13850 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005230648751351956, - "loss": 3.8547, - "step": 13855 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005230098576880997, - "loss": 3.8067, - "step": 13860 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005229548234718056, - "loss": 3.6504, - "step": 13865 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005228997724904512, - "loss": 3.7026, - "step": 13870 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005228447047481763, - "loss": 3.7336, - "step": 13875 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005227896202491218, - "loss": 3.6148, - "step": 13880 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005227345189974295, - "loss": 3.7376, - "step": 13885 - }, - { - "epoch": 0.24, - "learning_rate": 0.000522679400997243, - "loss": 3.7209, - "step": 13890 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005226242662527066, - "loss": 3.7498, - "step": 13895 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005225691147679664, - "loss": 3.7111, - "step": 13900 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005225139465471694, - "loss": 3.6576, - "step": 13905 - }, - { - "epoch": 0.24, - "learning_rate": 0.000522458761594464, - "loss": 3.6185, - "step": 13910 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005224035599139997, - "loss": 3.641, - "step": 13915 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005223483415099276, - "loss": 3.704, - "step": 13920 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005222931063863998, - "loss": 3.6244, - "step": 13925 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005222378545475695, - "loss": 3.5334, - "step": 13930 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005221825859975915, - "loss": 3.651, - "step": 13935 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005221273007406218, - "loss": 3.5782, - "step": 13940 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005220719987808174, - "loss": 3.6259, - "step": 13945 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005220166801223369, - "loss": 3.6128, - "step": 13950 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005219613447693396, - "loss": 3.6453, - "step": 13955 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005219059927259869, - "loss": 3.7525, - "step": 13960 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005218506239964409, - "loss": 3.6357, - "step": 13965 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005217952385848648, - "loss": 3.6617, - "step": 13970 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005217398364954234, - "loss": 3.5647, - "step": 13975 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005216844177322828, - "loss": 3.6491, - "step": 13980 - }, - { - "epoch": 0.24, - "learning_rate": 0.00052162898229961, - "loss": 3.644, - "step": 13985 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005215735302015737, - "loss": 3.7162, - "step": 13990 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005215180614423434, - "loss": 3.5794, - "step": 13995 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005214625760260901, - "loss": 3.5723, - "step": 14000 - }, - { - "epoch": 0.24, - "eval_loss": 3.667832851409912, - "eval_runtime": 149.7768, - "eval_samples_per_second": 12.292, - "eval_steps_per_second": 0.774, - "step": 14000 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005214070739569861, - "loss": 3.5753, - "step": 14005 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005213515552392048, - "loss": 3.6657, - "step": 14010 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005212960198769209, - "loss": 3.6619, - "step": 14015 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005212404678743105, - "loss": 3.6746, - "step": 14020 - }, - { - "epoch": 0.24, - "learning_rate": 0.000521184899235551, - "loss": 3.6758, - "step": 14025 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005211293139648203, - "loss": 3.5972, - "step": 14030 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005210737120662985, - "loss": 3.5256, - "step": 14035 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005210180935441667, - "loss": 3.7113, - "step": 14040 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005209624584026068, - "loss": 3.5753, - "step": 14045 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005209068066458026, - "loss": 3.6638, - "step": 14050 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005208511382779385, - "loss": 3.6401, - "step": 14055 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005207954533032008, - "loss": 3.659, - "step": 14060 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005207397517257765, - "loss": 3.7119, - "step": 14065 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005206840335498541, - "loss": 3.6507, - "step": 14070 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005206282987796234, - "loss": 3.5722, - "step": 14075 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005205725474192753, - "loss": 3.6486, - "step": 14080 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005205167794730022, - "loss": 3.772, - "step": 14085 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005204609949449972, - "loss": 3.6892, - "step": 14090 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005204051938394554, - "loss": 3.7151, - "step": 14095 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005203493761605724, - "loss": 3.6335, - "step": 14100 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005202935419125458, - "loss": 3.68, - "step": 14105 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005202376910995736, - "loss": 3.6781, - "step": 14110 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005201818237258558, - "loss": 3.7159, - "step": 14115 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005201259397955934, - "loss": 3.6356, - "step": 14120 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005200700393129884, - "loss": 3.5929, - "step": 14125 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005200141222822443, - "loss": 3.702, - "step": 14130 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005199581887075657, - "loss": 3.572, - "step": 14135 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005199022385931585, - "loss": 3.5301, - "step": 14140 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005198462719432301, - "loss": 3.7308, - "step": 14145 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005197902887619887, - "loss": 3.6635, - "step": 14150 - }, - { - "epoch": 0.24, - "learning_rate": 0.0005197342890536441, - "loss": 3.6117, - "step": 14155 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005196782728224069, - "loss": 3.6756, - "step": 14160 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005196222400724894, - "loss": 3.6662, - "step": 14165 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005195661908081052, - "loss": 3.5442, - "step": 14170 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005195101250334684, - "loss": 3.6221, - "step": 14175 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005194540427527955, - "loss": 3.7254, - "step": 14180 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005193979439703031, - "loss": 3.6972, - "step": 14185 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005193418286902098, - "loss": 3.5639, - "step": 14190 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005192856969167349, - "loss": 3.7267, - "step": 14195 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005192295486540995, - "loss": 3.6168, - "step": 14200 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005191733839065257, - "loss": 3.5785, - "step": 14205 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005191172026782365, - "loss": 3.6425, - "step": 14210 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005190610049734568, - "loss": 3.6147, - "step": 14215 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005190047907964121, - "loss": 3.6009, - "step": 14220 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005189485601513295, - "loss": 3.6146, - "step": 14225 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005188923130424372, - "loss": 3.6774, - "step": 14230 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005188360494739649, - "loss": 3.6255, - "step": 14235 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005187797694501431, - "loss": 3.4911, - "step": 14240 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005187234729752039, - "loss": 3.5676, - "step": 14245 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005186671600533805, - "loss": 3.6448, - "step": 14250 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005186108306889073, - "loss": 3.5538, - "step": 14255 - }, - { - "epoch": 0.25, - "learning_rate": 0.00051855448488602, - "loss": 3.4816, - "step": 14260 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005184981226489555, - "loss": 3.5797, - "step": 14265 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005184417439819519, - "loss": 3.4676, - "step": 14270 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005183853488892488, - "loss": 3.6309, - "step": 14275 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005183289373750866, - "loss": 3.5128, - "step": 14280 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005182725094437072, - "loss": 3.6644, - "step": 14285 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005182160650993537, - "loss": 3.6486, - "step": 14290 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005181596043462704, - "loss": 3.6108, - "step": 14295 - }, - { - "epoch": 0.25, - "learning_rate": 0.000518103127188703, - "loss": 3.5354, - "step": 14300 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005180466336308983, - "loss": 3.5014, - "step": 14305 - }, - { - "epoch": 0.25, - "learning_rate": 0.000517990123677104, - "loss": 3.416, - "step": 14310 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005179335973315697, - "loss": 3.5127, - "step": 14315 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005178770545985458, - "loss": 3.6262, - "step": 14320 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005178204954822839, - "loss": 3.6022, - "step": 14325 - }, - { - "epoch": 0.25, - "learning_rate": 0.000517763919987037, - "loss": 3.6538, - "step": 14330 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005177073281170595, - "loss": 3.6462, - "step": 14335 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005176507198766066, - "loss": 3.5705, - "step": 14340 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005175940952699351, - "loss": 3.5812, - "step": 14345 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005175374543013025, - "loss": 3.6557, - "step": 14350 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005174807969749685, - "loss": 3.6208, - "step": 14355 - }, - { - "epoch": 0.25, - "learning_rate": 0.000517424123295193, - "loss": 3.5958, - "step": 14360 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005173674332662377, - "loss": 3.5134, - "step": 14365 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005173107268923655, - "loss": 3.477, - "step": 14370 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005172540041778403, - "loss": 3.5057, - "step": 14375 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005171972651269273, - "loss": 3.6122, - "step": 14380 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005171405097438933, - "loss": 3.6689, - "step": 14385 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005170837380330056, - "loss": 3.5244, - "step": 14390 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005170269499985334, - "loss": 3.6451, - "step": 14395 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005169701456447468, - "loss": 3.6317, - "step": 14400 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005169133249759171, - "loss": 3.5989, - "step": 14405 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005168564879963172, - "loss": 3.6657, - "step": 14410 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005167996347102207, - "loss": 3.6195, - "step": 14415 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005167427651219027, - "loss": 3.6496, - "step": 14420 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005166858792356396, - "loss": 3.719, - "step": 14425 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005166289770557089, - "loss": 3.5693, - "step": 14430 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005165720585863893, - "loss": 3.5917, - "step": 14435 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005165151238319608, - "loss": 3.5493, - "step": 14440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005164581727967046, - "loss": 3.5514, - "step": 14445 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005164012054849032, - "loss": 3.5368, - "step": 14450 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005163442219008402, - "loss": 3.5879, - "step": 14455 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005162872220488004, - "loss": 3.6387, - "step": 14460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00051623020593307, - "loss": 3.5715, - "step": 14465 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005161731735579361, - "loss": 3.6949, - "step": 14470 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005161161249276876, - "loss": 3.6781, - "step": 14475 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005160590600466141, - "loss": 3.537, - "step": 14480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005160019789190065, - "loss": 3.664, - "step": 14485 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005159448815491571, - "loss": 3.6726, - "step": 14490 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005158877679413593, - "loss": 3.6492, - "step": 14495 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005158306380999078, - "loss": 3.5562, - "step": 14500 - }, - { - "epoch": 0.25, - "eval_loss": 3.648855447769165, - "eval_runtime": 149.8786, - "eval_samples_per_second": 12.283, - "eval_steps_per_second": 0.774, - "step": 14500 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005157734920290984, - "loss": 3.634, - "step": 14505 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005157163297332282, - "loss": 3.5898, - "step": 14510 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005156591512165955, - "loss": 3.6621, - "step": 14515 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005156019564834999, - "loss": 3.6402, - "step": 14520 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005155447455382421, - "loss": 3.6102, - "step": 14525 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005154875183851241, - "loss": 3.624, - "step": 14530 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005154302750284491, - "loss": 3.6164, - "step": 14535 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005153730154725214, - "loss": 3.4685, - "step": 14540 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005153157397216468, - "loss": 3.6154, - "step": 14545 - }, - { - "epoch": 0.25, - "learning_rate": 0.000515258447780132, - "loss": 3.6433, - "step": 14550 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005152011396522851, - "loss": 3.5996, - "step": 14555 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005151438153424155, - "loss": 3.6255, - "step": 14560 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005150864748548335, - "loss": 3.5906, - "step": 14565 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005150291181938508, - "loss": 3.6043, - "step": 14570 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005149717453637805, - "loss": 3.4846, - "step": 14575 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005149143563689367, - "loss": 3.5817, - "step": 14580 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005148569512136346, - "loss": 3.5475, - "step": 14585 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005147995299021911, - "loss": 3.6521, - "step": 14590 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005147420924389236, - "loss": 3.6627, - "step": 14595 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005146846388281514, - "loss": 3.6745, - "step": 14600 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005146271690741945, - "loss": 3.6828, - "step": 14605 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005145696831813744, - "loss": 3.6828, - "step": 14610 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005145121811540139, - "loss": 3.5683, - "step": 14615 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005144546629964367, - "loss": 3.6632, - "step": 14620 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005143971287129679, - "loss": 3.6587, - "step": 14625 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005143395783079337, - "loss": 3.603, - "step": 14630 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005142820117856618, - "loss": 3.6555, - "step": 14635 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005142244291504808, - "loss": 3.7154, - "step": 14640 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005141668304067205, - "loss": 3.6161, - "step": 14645 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005141092155587123, - "loss": 3.6059, - "step": 14650 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005140515846107882, - "loss": 3.6758, - "step": 14655 - }, - { - "epoch": 0.25, - "learning_rate": 0.000513993937567282, - "loss": 3.6174, - "step": 14660 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005139362744325285, - "loss": 3.5476, - "step": 14665 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005138785952108635, - "loss": 3.6128, - "step": 14670 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005138208999066242, - "loss": 3.5876, - "step": 14675 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005137631885241491, - "loss": 3.5896, - "step": 14680 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005137054610677777, - "loss": 3.5601, - "step": 14685 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005136477175418508, - "loss": 3.6276, - "step": 14690 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005135899579507106, - "loss": 3.5822, - "step": 14695 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005135321822987001, - "loss": 3.4708, - "step": 14700 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005134743905901639, - "loss": 3.6286, - "step": 14705 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005134165828294475, - "loss": 3.5722, - "step": 14710 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005133587590208979, - "loss": 3.6132, - "step": 14715 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005133009191688629, - "loss": 3.7012, - "step": 14720 - }, - { - "epoch": 0.25, - "learning_rate": 0.000513243063277692, - "loss": 3.6159, - "step": 14725 - }, - { - "epoch": 0.25, - "learning_rate": 0.0005131851913517358, - "loss": 3.6005, - "step": 14730 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005131273033953456, - "loss": 3.592, - "step": 14735 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005130693994128744, - "loss": 3.6199, - "step": 14740 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005130114794086763, - "loss": 3.5501, - "step": 14745 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005129535433871067, - "loss": 3.6375, - "step": 14750 - }, - { - "epoch": 0.26, - "learning_rate": 0.000512895591352522, - "loss": 3.5636, - "step": 14755 - }, - { - "epoch": 0.26, - "learning_rate": 0.00051283762330928, - "loss": 3.6531, - "step": 14760 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005127796392617393, - "loss": 3.5518, - "step": 14765 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005127216392142604, - "loss": 3.5923, - "step": 14770 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005126636231712042, - "loss": 3.5783, - "step": 14775 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005126055911369335, - "loss": 3.69, - "step": 14780 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005125475431158121, - "loss": 3.7556, - "step": 14785 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005124894791122045, - "loss": 3.6912, - "step": 14790 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005124313991304773, - "loss": 3.6166, - "step": 14795 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005123733031749974, - "loss": 3.5407, - "step": 14800 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005123151912501336, - "loss": 3.6813, - "step": 14805 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005122570633602554, - "loss": 3.6641, - "step": 14810 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005121989195097339, - "loss": 3.6209, - "step": 14815 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005121407597029413, - "loss": 3.6123, - "step": 14820 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005120825839442506, - "loss": 3.5358, - "step": 14825 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005120243922380366, - "loss": 3.6142, - "step": 14830 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005119661845886747, - "loss": 3.5662, - "step": 14835 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005119079610005422, - "loss": 3.6078, - "step": 14840 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005118497214780171, - "loss": 3.6417, - "step": 14845 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005117914660254785, - "loss": 3.5625, - "step": 14850 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005117331946473071, - "loss": 3.619, - "step": 14855 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005116749073478846, - "loss": 3.616, - "step": 14860 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005116166041315939, - "loss": 3.5866, - "step": 14865 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005115582850028192, - "loss": 3.6377, - "step": 14870 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005114999499659454, - "loss": 3.6135, - "step": 14875 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005114415990253595, - "loss": 3.5777, - "step": 14880 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005113832321854491, - "loss": 3.5029, - "step": 14885 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005113248494506027, - "loss": 3.5864, - "step": 14890 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005112664508252107, - "loss": 3.5529, - "step": 14895 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005112080363136644, - "loss": 3.588, - "step": 14900 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005111496059203561, - "loss": 3.614, - "step": 14905 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005110911596496797, - "loss": 3.6759, - "step": 14910 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005110326975060298, - "loss": 3.4662, - "step": 14915 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005109742194938026, - "loss": 3.6857, - "step": 14920 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005109157256173954, - "loss": 3.6664, - "step": 14925 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005108572158812064, - "loss": 3.5142, - "step": 14930 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005107986902896355, - "loss": 3.5656, - "step": 14935 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005107401488470834, - "loss": 3.7301, - "step": 14940 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005106815915579522, - "loss": 3.5186, - "step": 14945 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005106230184266448, - "loss": 3.65, - "step": 14950 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005105644294575661, - "loss": 3.6077, - "step": 14955 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005105058246551214, - "loss": 3.6377, - "step": 14960 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005104472040237174, - "loss": 3.5337, - "step": 14965 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005103885675677622, - "loss": 3.5585, - "step": 14970 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005103299152916651, - "loss": 3.6477, - "step": 14975 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005102712471998361, - "loss": 3.5803, - "step": 14980 - }, - { - "epoch": 0.26, - "learning_rate": 0.000510212563296687, - "loss": 3.5321, - "step": 14985 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005101538635866304, - "loss": 3.6679, - "step": 14990 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005100951480740804, - "loss": 3.6513, - "step": 14995 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005100364167634518, - "loss": 3.5916, - "step": 15000 - }, - { - "epoch": 0.26, - "eval_loss": 3.638385534286499, - "eval_runtime": 149.9801, - "eval_samples_per_second": 12.275, - "eval_steps_per_second": 0.773, - "step": 15000 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005099776696591613, - "loss": 3.6363, - "step": 15005 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005099189067656261, - "loss": 3.5853, - "step": 15010 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005098601280872649, - "loss": 3.6406, - "step": 15015 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005098013336284975, - "loss": 3.5502, - "step": 15020 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005097425233937451, - "loss": 3.6303, - "step": 15025 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005096836973874298, - "loss": 3.5962, - "step": 15030 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005096248556139753, - "loss": 3.4167, - "step": 15035 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005095659980778056, - "loss": 3.5678, - "step": 15040 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005095071247833473, - "loss": 3.5376, - "step": 15045 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005094482357350266, - "loss": 3.6403, - "step": 15050 - }, - { - "epoch": 0.26, - "learning_rate": 0.000509389330937272, - "loss": 3.6371, - "step": 15055 - }, - { - "epoch": 0.26, - "learning_rate": 0.000509330410394513, - "loss": 3.5036, - "step": 15060 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005092714741111797, - "loss": 3.6479, - "step": 15065 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005092125220917042, - "loss": 3.6069, - "step": 15070 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005091535543405192, - "loss": 3.6093, - "step": 15075 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005090945708620588, - "loss": 3.6439, - "step": 15080 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005090355716607582, - "loss": 3.5337, - "step": 15085 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005089765567410539, - "loss": 3.604, - "step": 15090 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005089175261073837, - "loss": 3.5584, - "step": 15095 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005088584797641861, - "loss": 3.5309, - "step": 15100 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005087994177159012, - "loss": 3.6083, - "step": 15105 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005087403399669702, - "loss": 3.5913, - "step": 15110 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005086812465218354, - "loss": 3.5442, - "step": 15115 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005086221373849403, - "loss": 3.6318, - "step": 15120 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005085630125607297, - "loss": 3.6189, - "step": 15125 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005085038720536493, - "loss": 3.5056, - "step": 15130 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005084447158681465, - "loss": 3.5779, - "step": 15135 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005083855440086691, - "loss": 3.5665, - "step": 15140 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005083263564796669, - "loss": 3.5793, - "step": 15145 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005082671532855903, - "loss": 3.5825, - "step": 15150 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005082079344308912, - "loss": 3.7259, - "step": 15155 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005081486999200224, - "loss": 3.6756, - "step": 15160 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005080894497574381, - "loss": 3.5817, - "step": 15165 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005080301839475936, - "loss": 3.518, - "step": 15170 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005079709024949455, - "loss": 3.5779, - "step": 15175 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005079116054039513, - "loss": 3.7219, - "step": 15180 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005078522926790698, - "loss": 3.5903, - "step": 15185 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005077929643247613, - "loss": 3.5368, - "step": 15190 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005077336203454867, - "loss": 3.6035, - "step": 15195 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005076742607457086, - "loss": 3.6253, - "step": 15200 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005076148855298903, - "loss": 3.6759, - "step": 15205 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005075554947024967, - "loss": 3.6473, - "step": 15210 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005074960882679935, - "loss": 3.5805, - "step": 15215 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005074366662308478, - "loss": 3.5368, - "step": 15220 - }, - { - "epoch": 0.26, - "learning_rate": 0.000507377228595528, - "loss": 3.5171, - "step": 15225 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005073177753665034, - "loss": 3.6021, - "step": 15230 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005072583065482446, - "loss": 3.6194, - "step": 15235 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005071988221452233, - "loss": 3.6315, - "step": 15240 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005071393221619124, - "loss": 3.6864, - "step": 15245 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005070798066027862, - "loss": 3.5701, - "step": 15250 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005070202754723196, - "loss": 3.6381, - "step": 15255 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005069607287749893, - "loss": 3.572, - "step": 15260 - }, - { - "epoch": 0.26, - "learning_rate": 0.000506901166515273, - "loss": 3.6789, - "step": 15265 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005068415886976493, - "loss": 3.536, - "step": 15270 - }, - { - "epoch": 0.26, - "learning_rate": 0.000506781995326598, - "loss": 3.5427, - "step": 15275 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005067223864066005, - "loss": 3.5554, - "step": 15280 - }, - { - "epoch": 0.26, - "learning_rate": 0.000506662761942139, - "loss": 3.5994, - "step": 15285 - }, - { - "epoch": 0.26, - "learning_rate": 0.000506603121937697, - "loss": 3.6482, - "step": 15290 - }, - { - "epoch": 0.26, - "learning_rate": 0.000506543466397759, - "loss": 3.6878, - "step": 15295 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005064837953268108, - "loss": 3.489, - "step": 15300 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005064241087293393, - "loss": 3.4993, - "step": 15305 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005063644066098329, - "loss": 3.5804, - "step": 15310 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005063046889727807, - "loss": 3.5341, - "step": 15315 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005062449558226732, - "loss": 3.5108, - "step": 15320 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005061852071640019, - "loss": 3.6813, - "step": 15325 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005061254430012598, - "loss": 3.4356, - "step": 15330 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005060656633389406, - "loss": 3.6614, - "step": 15335 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005060058681815399, - "loss": 3.6952, - "step": 15340 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005059460575335534, - "loss": 3.6666, - "step": 15345 - }, - { - "epoch": 0.27, - "learning_rate": 0.000505886231399479, - "loss": 3.5999, - "step": 15350 - }, - { - "epoch": 0.27, - "learning_rate": 0.000505826389783815, - "loss": 3.6896, - "step": 15355 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005057665326910614, - "loss": 3.5638, - "step": 15360 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005057066601257191, - "loss": 3.5589, - "step": 15365 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005056467720922903, - "loss": 3.6379, - "step": 15370 - }, - { - "epoch": 0.27, - "learning_rate": 0.000505586868595278, - "loss": 3.5751, - "step": 15375 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005055269496391868, - "loss": 3.5328, - "step": 15380 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005054670152285223, - "loss": 3.5601, - "step": 15385 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005054070653677913, - "loss": 3.5779, - "step": 15390 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005053471000615017, - "loss": 3.637, - "step": 15395 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005052871193141625, - "loss": 3.5866, - "step": 15400 - }, - { - "epoch": 0.27, - "learning_rate": 0.000505227123130284, - "loss": 3.6035, - "step": 15405 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005051671115143776, - "loss": 3.6226, - "step": 15410 - }, - { - "epoch": 0.27, - "learning_rate": 0.000505107084470956, - "loss": 3.5384, - "step": 15415 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005050470420045327, - "loss": 3.512, - "step": 15420 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005049869841196228, - "loss": 3.6033, - "step": 15425 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005049269108207421, - "loss": 3.5151, - "step": 15430 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005048668221124081, - "loss": 3.4719, - "step": 15435 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005048067179991389, - "loss": 3.6271, - "step": 15440 - }, - { - "epoch": 0.27, - "learning_rate": 0.000504746598485454, - "loss": 3.7077, - "step": 15445 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005046864635758744, - "loss": 3.6447, - "step": 15450 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005046263132749217, - "loss": 3.601, - "step": 15455 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005045661475871189, - "loss": 3.6165, - "step": 15460 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005045059665169902, - "loss": 3.6336, - "step": 15465 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005044457700690609, - "loss": 3.4781, - "step": 15470 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005043855582478574, - "loss": 3.5317, - "step": 15475 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005043253310579075, - "loss": 3.5082, - "step": 15480 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005042650885037397, - "loss": 3.6004, - "step": 15485 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005042048305898843, - "loss": 3.6375, - "step": 15490 - }, - { - "epoch": 0.27, - "learning_rate": 0.000504144557320872, - "loss": 3.4989, - "step": 15495 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005040842687012351, - "loss": 3.6445, - "step": 15500 - }, - { - "epoch": 0.27, - "eval_loss": 3.6320202350616455, - "eval_runtime": 150.4615, - "eval_samples_per_second": 12.236, - "eval_steps_per_second": 0.771, - "step": 15500 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005040239647355073, - "loss": 3.521, - "step": 15505 - }, - { - "epoch": 0.27, - "learning_rate": 0.000503963645428223, - "loss": 3.5892, - "step": 15510 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005039033107839177, - "loss": 3.5733, - "step": 15515 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005038429608071285, - "loss": 3.6047, - "step": 15520 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005037825955023933, - "loss": 3.6021, - "step": 15525 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005037222148742514, - "loss": 3.6138, - "step": 15530 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005036618189272428, - "loss": 3.6236, - "step": 15535 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005036014076659094, - "loss": 3.5677, - "step": 15540 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005035409810947934, - "loss": 3.498, - "step": 15545 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005034805392184389, - "loss": 3.5847, - "step": 15550 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005034200820413906, - "loss": 3.6563, - "step": 15555 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005033596095681946, - "loss": 3.6285, - "step": 15560 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005032991218033981, - "loss": 3.524, - "step": 15565 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005032386187515497, - "loss": 3.5815, - "step": 15570 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005031781004171986, - "loss": 3.5129, - "step": 15575 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005031175668048957, - "loss": 3.5887, - "step": 15580 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005030570179191927, - "loss": 3.5245, - "step": 15585 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005029964537646427, - "loss": 3.6023, - "step": 15590 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005029358743457997, - "loss": 3.5239, - "step": 15595 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005028752796672189, - "loss": 3.5323, - "step": 15600 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005028146697334568, - "loss": 3.5991, - "step": 15605 - }, - { - "epoch": 0.27, - "learning_rate": 0.000502754044549071, - "loss": 3.4643, - "step": 15610 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005026934041186201, - "loss": 3.5993, - "step": 15615 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005026327484466641, - "loss": 3.5821, - "step": 15620 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005025720775377637, - "loss": 3.498, - "step": 15625 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005025113913964813, - "loss": 3.5981, - "step": 15630 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005024506900273803, - "loss": 3.5101, - "step": 15635 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005023899734350248, - "loss": 3.5864, - "step": 15640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005023292416239805, - "loss": 3.5627, - "step": 15645 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005022684945988141, - "loss": 3.5497, - "step": 15650 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005022077323640937, - "loss": 3.6516, - "step": 15655 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005021469549243881, - "loss": 3.4684, - "step": 15660 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005020861622842673, - "loss": 3.5867, - "step": 15665 - }, - { - "epoch": 0.27, - "learning_rate": 0.000502025354448303, - "loss": 3.5082, - "step": 15670 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005019645314210672, - "loss": 3.6077, - "step": 15675 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005019036932071338, - "loss": 3.5891, - "step": 15680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005018428398110774, - "loss": 3.6244, - "step": 15685 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005017819712374739, - "loss": 3.4976, - "step": 15690 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005017210874909003, - "loss": 3.6083, - "step": 15695 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005016601885759346, - "loss": 3.5849, - "step": 15700 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005015992744971562, - "loss": 3.6031, - "step": 15705 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005015383452591456, - "loss": 3.5034, - "step": 15710 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005014774008664844, - "loss": 3.5678, - "step": 15715 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005014164413237552, - "loss": 3.6184, - "step": 15720 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005013554666355417, - "loss": 3.6015, - "step": 15725 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005012944768064291, - "loss": 3.6218, - "step": 15730 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005012334718410036, - "loss": 3.8092, - "step": 15735 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005011724517438521, - "loss": 4.6015, - "step": 15740 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005011114165195634, - "loss": 4.2182, - "step": 15745 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005010503661727268, - "loss": 3.7612, - "step": 15750 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005009893007079329, - "loss": 3.7264, - "step": 15755 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005009282201297739, - "loss": 3.6667, - "step": 15760 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005008671244428423, - "loss": 3.6342, - "step": 15765 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005008060136517324, - "loss": 3.6282, - "step": 15770 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005007448877610395, - "loss": 3.6548, - "step": 15775 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005006837467753597, - "loss": 3.6451, - "step": 15780 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005006225906992906, - "loss": 3.7284, - "step": 15785 - }, - { - "epoch": 0.27, - "learning_rate": 0.000500561419537431, - "loss": 3.6631, - "step": 15790 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005005002332943804, - "loss": 3.6146, - "step": 15795 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005004390319747399, - "loss": 3.6857, - "step": 15800 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005003778155831113, - "loss": 3.5865, - "step": 15805 - }, - { - "epoch": 0.27, - "learning_rate": 0.000500316584124098, - "loss": 3.7437, - "step": 15810 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005002553376023042, - "loss": 3.5996, - "step": 15815 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005001940760223352, - "loss": 3.7551, - "step": 15820 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005001327993887977, - "loss": 3.6479, - "step": 15825 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005000715077062994, - "loss": 3.6632, - "step": 15830 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005000102009794491, - "loss": 3.5554, - "step": 15835 - }, - { - "epoch": 0.27, - "learning_rate": 0.0004999488792128567, - "loss": 3.6261, - "step": 15840 - }, - { - "epoch": 0.27, - "learning_rate": 0.0004998875424111332, - "loss": 3.5204, - "step": 15845 - }, - { - "epoch": 0.27, - "learning_rate": 0.0004998261905788909, - "loss": 3.6914, - "step": 15850 - }, - { - "epoch": 0.27, - "learning_rate": 0.0004997648237207433, - "loss": 3.4878, - "step": 15855 - }, - { - "epoch": 0.27, - "learning_rate": 0.0004997034418413046, - "loss": 3.5916, - "step": 15860 - }, - { - "epoch": 0.27, - "learning_rate": 0.0004996420449451907, - "loss": 3.6535, - "step": 15865 - }, - { - "epoch": 0.27, - "learning_rate": 0.000499580633037018, - "loss": 3.6224, - "step": 15870 - }, - { - "epoch": 0.27, - "learning_rate": 0.0004995192061214047, - "loss": 3.5223, - "step": 15875 - }, - { - "epoch": 0.27, - "learning_rate": 0.0004994577642029695, - "loss": 3.563, - "step": 15880 - }, - { - "epoch": 0.27, - "learning_rate": 0.0004993963072863326, - "loss": 3.561, - "step": 15885 - }, - { - "epoch": 0.27, - "learning_rate": 0.0004993348353761154, - "loss": 3.6209, - "step": 15890 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004992733484769401, - "loss": 3.3646, - "step": 15895 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004992118465934302, - "loss": 3.5317, - "step": 15900 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004991503297302105, - "loss": 3.5534, - "step": 15905 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004990887978919066, - "loss": 3.6341, - "step": 15910 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004990272510831454, - "loss": 3.5312, - "step": 15915 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004989656893085549, - "loss": 3.5533, - "step": 15920 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004989041125727643, - "loss": 3.5574, - "step": 15925 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004988425208804037, - "loss": 3.5899, - "step": 15930 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004987809142361046, - "loss": 3.5567, - "step": 15935 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004987192926444995, - "loss": 3.5264, - "step": 15940 - }, - { - "epoch": 0.28, - "learning_rate": 0.000498657656110222, - "loss": 3.6206, - "step": 15945 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004985960046379069, - "loss": 3.5102, - "step": 15950 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004985343382321901, - "loss": 3.5516, - "step": 15955 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004984726568977083, - "loss": 3.6478, - "step": 15960 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004984109606391002, - "loss": 3.6414, - "step": 15965 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004983492494610045, - "loss": 3.5679, - "step": 15970 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004982875233680618, - "loss": 3.5373, - "step": 15975 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004982257823649136, - "loss": 3.6789, - "step": 15980 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004981640264562023, - "loss": 3.642, - "step": 15985 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004981022556465719, - "loss": 3.5777, - "step": 15990 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004980404699406672, - "loss": 3.6623, - "step": 15995 - }, - { - "epoch": 0.28, - "learning_rate": 0.000497978669343134, - "loss": 3.6401, - "step": 16000 - }, - { - "epoch": 0.28, - "eval_loss": 3.6124560832977295, - "eval_runtime": 150.0796, - "eval_samples_per_second": 12.267, - "eval_steps_per_second": 0.773, - "step": 16000 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004979168538586195, - "loss": 3.5841, - "step": 16005 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004978550234917719, - "loss": 3.595, - "step": 16010 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004977931782472405, - "loss": 3.5558, - "step": 16015 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004977313181296759, - "loss": 3.592, - "step": 16020 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004976694431437295, - "loss": 3.5335, - "step": 16025 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004976075532940539, - "loss": 3.4794, - "step": 16030 - }, - { - "epoch": 0.28, - "learning_rate": 0.000497545648585303, - "loss": 3.6644, - "step": 16035 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004974837290221318, - "loss": 3.5437, - "step": 16040 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004974217946091962, - "loss": 3.6994, - "step": 16045 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004973598453511535, - "loss": 3.6072, - "step": 16050 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004972978812526619, - "loss": 3.5775, - "step": 16055 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004972359023183808, - "loss": 3.5683, - "step": 16060 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004971739085529705, - "loss": 3.4989, - "step": 16065 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004971118999610929, - "loss": 3.5584, - "step": 16070 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004970498765474106, - "loss": 3.5787, - "step": 16075 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004969878383165873, - "loss": 3.6137, - "step": 16080 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004969257852732883, - "loss": 3.7059, - "step": 16085 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004968637174221794, - "loss": 3.6148, - "step": 16090 - }, - { - "epoch": 0.28, - "learning_rate": 0.000496801634767928, - "loss": 3.5649, - "step": 16095 - }, - { - "epoch": 0.28, - "learning_rate": 0.000496739537315202, - "loss": 3.6258, - "step": 16100 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004966774250686713, - "loss": 3.5316, - "step": 16105 - }, - { - "epoch": 0.28, - "learning_rate": 0.000496615298033006, - "loss": 3.5973, - "step": 16110 - }, - { - "epoch": 0.28, - "learning_rate": 0.000496553156212878, - "loss": 3.6305, - "step": 16115 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004964909996129599, - "loss": 3.5401, - "step": 16120 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004964288282379258, - "loss": 3.5479, - "step": 16125 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004963666420924504, - "loss": 3.5814, - "step": 16130 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004963044411812099, - "loss": 3.4288, - "step": 16135 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004962422255088815, - "loss": 3.5357, - "step": 16140 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004961799950801434, - "loss": 3.5193, - "step": 16145 - }, - { - "epoch": 0.28, - "learning_rate": 0.000496117749899675, - "loss": 3.5514, - "step": 16150 - }, - { - "epoch": 0.28, - "learning_rate": 0.000496055489972157, - "loss": 3.5557, - "step": 16155 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004959932153022709, - "loss": 3.5929, - "step": 16160 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004959309258946996, - "loss": 3.542, - "step": 16165 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004958686217541266, - "loss": 3.5837, - "step": 16170 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004958063028852372, - "loss": 3.4923, - "step": 16175 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004957439692927173, - "loss": 3.6609, - "step": 16180 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004956816209812539, - "loss": 3.4934, - "step": 16185 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004956192579555356, - "loss": 3.5329, - "step": 16190 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004955568802202517, - "loss": 3.5227, - "step": 16195 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004954944877800924, - "loss": 3.5705, - "step": 16200 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004954320806397497, - "loss": 3.5531, - "step": 16205 - }, - { - "epoch": 0.28, - "learning_rate": 0.000495369658803916, - "loss": 3.4774, - "step": 16210 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004953072222772853, - "loss": 3.5052, - "step": 16215 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004952447710645524, - "loss": 3.5444, - "step": 16220 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004951823051704134, - "loss": 3.5683, - "step": 16225 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004951198245995652, - "loss": 3.4055, - "step": 16230 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004950573293567062, - "loss": 3.6428, - "step": 16235 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004949948194465357, - "loss": 3.6984, - "step": 16240 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004949322948737541, - "loss": 3.5885, - "step": 16245 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004948697556430631, - "loss": 3.4286, - "step": 16250 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004948072017591649, - "loss": 3.565, - "step": 16255 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004947446332267636, - "loss": 3.5906, - "step": 16260 - }, - { - "epoch": 0.28, - "learning_rate": 0.000494682050050564, - "loss": 3.634, - "step": 16265 - }, - { - "epoch": 0.28, - "learning_rate": 0.000494619452235272, - "loss": 3.5922, - "step": 16270 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004945568397855945, - "loss": 3.538, - "step": 16275 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004944942127062397, - "loss": 3.572, - "step": 16280 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004944315710019169, - "loss": 3.6377, - "step": 16285 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004943689146773365, - "loss": 3.6228, - "step": 16290 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004943062437372098, - "loss": 3.5855, - "step": 16295 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004942435581862493, - "loss": 3.5372, - "step": 16300 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004941808580291688, - "loss": 3.5609, - "step": 16305 - }, - { - "epoch": 0.28, - "learning_rate": 0.000494118143270683, - "loss": 3.6014, - "step": 16310 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004940554139155074, - "loss": 3.5558, - "step": 16315 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004939926699683594, - "loss": 3.5162, - "step": 16320 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004939299114339568, - "loss": 3.6057, - "step": 16325 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004938671383170187, - "loss": 3.6958, - "step": 16330 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004938043506222653, - "loss": 3.6499, - "step": 16335 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004937415483544181, - "loss": 3.5561, - "step": 16340 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004936787315181993, - "loss": 3.5672, - "step": 16345 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004936159001183325, - "loss": 3.5341, - "step": 16350 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004935530541595422, - "loss": 3.5887, - "step": 16355 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004934901936465543, - "loss": 3.5734, - "step": 16360 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004934273185840953, - "loss": 3.5032, - "step": 16365 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004933644289768933, - "loss": 3.6115, - "step": 16370 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004933015248296772, - "loss": 3.5705, - "step": 16375 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004932386061471772, - "loss": 3.6443, - "step": 16380 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004931756729341242, - "loss": 3.4791, - "step": 16385 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004931127251952507, - "loss": 3.6197, - "step": 16390 - }, - { - "epoch": 0.28, - "learning_rate": 0.00049304976293529, - "loss": 3.4558, - "step": 16395 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004929867861589764, - "loss": 3.5077, - "step": 16400 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004929237948710455, - "loss": 3.5552, - "step": 16405 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004928607890762341, - "loss": 3.5842, - "step": 16410 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004927977687792796, - "loss": 3.5561, - "step": 16415 - }, - { - "epoch": 0.28, - "learning_rate": 0.000492734733984921, - "loss": 3.4731, - "step": 16420 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004926716846978982, - "loss": 3.5924, - "step": 16425 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004926086209229521, - "loss": 3.4558, - "step": 16430 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004925455426648248, - "loss": 3.457, - "step": 16435 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004924824499282595, - "loss": 3.5853, - "step": 16440 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004924193427180004, - "loss": 3.6165, - "step": 16445 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004923562210387928, - "loss": 3.4844, - "step": 16450 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004922930848953834, - "loss": 3.629, - "step": 16455 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004922299342925193, - "loss": 3.4659, - "step": 16460 - }, - { - "epoch": 0.28, - "learning_rate": 0.0004921667692349493, - "loss": 3.6049, - "step": 16465 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004921035897274232, - "loss": 3.5527, - "step": 16470 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004920403957746916, - "loss": 3.5113, - "step": 16475 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004919771873815065, - "loss": 3.5445, - "step": 16480 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004919139645526207, - "loss": 3.5608, - "step": 16485 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004918507272927883, - "loss": 3.5982, - "step": 16490 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004917874756067644, - "loss": 3.5369, - "step": 16495 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004917242094993054, - "loss": 3.4741, - "step": 16500 - }, - { - "epoch": 0.29, - "eval_loss": 3.5877273082733154, - "eval_runtime": 150.0518, - "eval_samples_per_second": 12.269, - "eval_steps_per_second": 0.773, - "step": 16500 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004916609289751683, - "loss": 3.5372, - "step": 16505 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004915976340391116, - "loss": 3.5657, - "step": 16510 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004915343246958947, - "loss": 3.5311, - "step": 16515 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004914710009502782, - "loss": 3.5391, - "step": 16520 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004914076628070238, - "loss": 3.6073, - "step": 16525 - }, - { - "epoch": 0.29, - "learning_rate": 0.000491344310270894, - "loss": 3.4663, - "step": 16530 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004912809433466527, - "loss": 3.5429, - "step": 16535 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004912175620390648, - "loss": 3.5616, - "step": 16540 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004911541663528961, - "loss": 3.5202, - "step": 16545 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004910907562929139, - "loss": 3.6226, - "step": 16550 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004910273318638861, - "loss": 3.5045, - "step": 16555 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004909638930705819, - "loss": 3.4955, - "step": 16560 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004909004399177717, - "loss": 3.5726, - "step": 16565 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004908369724102267, - "loss": 3.4443, - "step": 16570 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004907734905527193, - "loss": 3.5395, - "step": 16575 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004907099943500233, - "loss": 3.4818, - "step": 16580 - }, - { - "epoch": 0.29, - "learning_rate": 0.000490646483806913, - "loss": 3.6123, - "step": 16585 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004905829589281642, - "loss": 3.5348, - "step": 16590 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004905194197185536, - "loss": 3.5266, - "step": 16595 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004904558661828589, - "loss": 3.5143, - "step": 16600 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004903922983258593, - "loss": 3.6398, - "step": 16605 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004903287161523346, - "loss": 3.5811, - "step": 16610 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004902651196670658, - "loss": 3.4796, - "step": 16615 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004902015088748352, - "loss": 3.4956, - "step": 16620 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004901378837804258, - "loss": 3.4245, - "step": 16625 - }, - { - "epoch": 0.29, - "learning_rate": 0.000490074244388622, - "loss": 3.6016, - "step": 16630 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004900105907042092, - "loss": 3.5701, - "step": 16635 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004899469227319737, - "loss": 3.5832, - "step": 16640 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004898832404767031, - "loss": 3.5255, - "step": 16645 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004898195439431858, - "loss": 3.5867, - "step": 16650 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004897558331362119, - "loss": 3.4916, - "step": 16655 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004896921080605716, - "loss": 3.5749, - "step": 16660 - }, - { - "epoch": 0.29, - "learning_rate": 0.000489628368721057, - "loss": 3.5705, - "step": 16665 - }, - { - "epoch": 0.29, - "learning_rate": 0.000489564615122461, - "loss": 3.5197, - "step": 16670 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004895008472695772, - "loss": 3.669, - "step": 16675 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004894370651672011, - "loss": 3.5425, - "step": 16680 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004893732688201284, - "loss": 3.5529, - "step": 16685 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004893094582331566, - "loss": 3.5687, - "step": 16690 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004892456334110835, - "loss": 3.5815, - "step": 16695 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004891817943587087, - "loss": 3.5539, - "step": 16700 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004891179410808326, - "loss": 3.6557, - "step": 16705 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004890540735822566, - "loss": 3.6539, - "step": 16710 - }, - { - "epoch": 0.29, - "learning_rate": 0.000488990191867783, - "loss": 3.675, - "step": 16715 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004889262959422155, - "loss": 3.5966, - "step": 16720 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004888623858103589, - "loss": 3.4819, - "step": 16725 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004887984614770188, - "loss": 3.4654, - "step": 16730 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004887345229470019, - "loss": 3.5918, - "step": 16735 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004886705702251161, - "loss": 3.5793, - "step": 16740 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004886066033161704, - "loss": 3.5092, - "step": 16745 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004885426222249747, - "loss": 3.6574, - "step": 16750 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004884786269563401, - "loss": 3.5397, - "step": 16755 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004884146175150786, - "loss": 3.4925, - "step": 16760 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004883505939060036, - "loss": 3.5832, - "step": 16765 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004882865561339293, - "loss": 3.5626, - "step": 16770 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048822250420367064, - "loss": 3.5953, - "step": 16775 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048815843812004446, - "loss": 3.6348, - "step": 16780 - }, - { - "epoch": 0.29, - "learning_rate": 0.000488094357887868, - "loss": 3.5284, - "step": 16785 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048803026351195984, - "loss": 3.5496, - "step": 16790 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048796615499713947, - "loss": 3.554, - "step": 16795 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004879020323482276, - "loss": 3.5148, - "step": 16800 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004878378955700458, - "loss": 3.5522, - "step": 16805 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048777374466741697, - "loss": 3.5446, - "step": 16810 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004877095796451648, - "loss": 3.5773, - "step": 16815 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004876454005081144, - "loss": 3.6213, - "step": 16820 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004875812072610913, - "loss": 3.621, - "step": 16825 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004875169999089229, - "loss": 3.524, - "step": 16830 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004874527784564371, - "loss": 3.5176, - "step": 16835 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004873885429084631, - "loss": 3.4054, - "step": 16840 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048732429326983097, - "loss": 3.5634, - "step": 16845 - }, - { - "epoch": 0.29, - "learning_rate": 0.000487260029545372, - "loss": 3.6623, - "step": 16850 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004871957517399184, - "loss": 3.5978, - "step": 16855 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048713145985830386, - "loss": 3.5235, - "step": 16860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048706715390536235, - "loss": 3.5122, - "step": 16865 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048700283388592976, - "loss": 3.5124, - "step": 16870 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048693849980484234, - "loss": 3.5231, - "step": 16875 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048687415166693773, - "loss": 3.6416, - "step": 16880 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004868097894770547, - "loss": 3.5731, - "step": 16885 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048674541324003287, - "loss": 3.5577, - "step": 16890 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048668102296071296, - "loss": 3.3237, - "step": 16895 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048661661864393696, - "loss": 3.5944, - "step": 16900 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048655220029454756, - "loss": 3.5177, - "step": 16905 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004864877679173889, - "loss": 3.511, - "step": 16910 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004864233215173056, - "loss": 3.5407, - "step": 16915 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004863588610991441, - "loss": 3.5129, - "step": 16920 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004862943866677512, - "loss": 3.5516, - "step": 16925 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004862298982279752, - "loss": 3.6088, - "step": 16930 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004861653957846653, - "loss": 3.4369, - "step": 16935 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004861008793426717, - "loss": 3.4892, - "step": 16940 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004860363489068457, - "loss": 3.5787, - "step": 16945 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048597180448203977, - "loss": 3.5381, - "step": 16950 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048590724607310714, - "loss": 3.4371, - "step": 16955 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004858426736849024, - "loss": 3.4722, - "step": 16960 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048577808732228105, - "loss": 3.5033, - "step": 16965 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048571348699009964, - "loss": 3.5394, - "step": 16970 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004856488726932158, - "loss": 3.5476, - "step": 16975 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048558424443648804, - "loss": 3.5903, - "step": 16980 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048551960222477635, - "loss": 3.4827, - "step": 16985 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004854549460629412, - "loss": 3.4285, - "step": 16990 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048539027595584464, - "loss": 3.5585, - "step": 16995 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004853255919083494, - "loss": 3.4662, - "step": 17000 - }, - { - "epoch": 0.29, - "eval_loss": 3.576127052307129, - "eval_runtime": 150.0801, - "eval_samples_per_second": 12.267, - "eval_steps_per_second": 0.773, - "step": 17000 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048526089392531945, - "loss": 3.4641, - "step": 17005 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004851961820116197, - "loss": 3.5871, - "step": 17010 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004851314561721162, - "loss": 3.6005, - "step": 17015 - }, - { - "epoch": 0.29, - "learning_rate": 0.000485066716411676, - "loss": 3.5406, - "step": 17020 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004850019627351672, - "loss": 3.5104, - "step": 17025 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004849371951474588, - "loss": 3.5767, - "step": 17030 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048487241365342116, - "loss": 3.5849, - "step": 17035 - }, - { - "epoch": 0.29, - "learning_rate": 0.00048480761825792556, - "loss": 3.5134, - "step": 17040 - }, - { - "epoch": 0.29, - "learning_rate": 0.0004847428089658442, - "loss": 3.5021, - "step": 17045 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004846779857820503, - "loss": 3.5575, - "step": 17050 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048461314871141835, - "loss": 3.5833, - "step": 17055 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048454829775882374, - "loss": 3.4563, - "step": 17060 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048448343292914296, - "loss": 3.5297, - "step": 17065 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004844185542272534, - "loss": 3.576, - "step": 17070 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048435366165803384, - "loss": 3.5006, - "step": 17075 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004842887552263637, - "loss": 3.579, - "step": 17080 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004842238349371235, - "loss": 3.565, - "step": 17085 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048415890079519516, - "loss": 3.4344, - "step": 17090 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004840939528054612, - "loss": 3.4845, - "step": 17095 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048402899097280556, - "loss": 3.5118, - "step": 17100 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004839640153021129, - "loss": 3.4574, - "step": 17105 - }, - { - "epoch": 0.3, - "learning_rate": 0.000483899025798269, - "loss": 3.5173, - "step": 17110 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048383402246616087, - "loss": 3.5195, - "step": 17115 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004837690053106765, - "loss": 3.4535, - "step": 17120 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004837039743367045, - "loss": 3.4877, - "step": 17125 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004836389295491353, - "loss": 3.5545, - "step": 17130 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048357387095285964, - "loss": 3.4785, - "step": 17135 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004835087985527697, - "loss": 3.5197, - "step": 17140 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004834437123537586, - "loss": 3.5171, - "step": 17145 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004833786123607205, - "loss": 3.5258, - "step": 17150 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004833134985785505, - "loss": 3.6473, - "step": 17155 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004832483710121448, - "loss": 3.5945, - "step": 17160 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004831832296664009, - "loss": 3.577, - "step": 17165 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048311807454621685, - "loss": 3.5094, - "step": 17170 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048305290565649195, - "loss": 3.489, - "step": 17175 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004829877230021268, - "loss": 3.524, - "step": 17180 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048292252658802264, - "loss": 3.5145, - "step": 17185 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048285731641908196, - "loss": 3.5293, - "step": 17190 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004827920925002082, - "loss": 3.4996, - "step": 17195 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048272685483630585, - "loss": 3.6237, - "step": 17200 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048266160343228066, - "loss": 3.4632, - "step": 17205 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004825963382930388, - "loss": 3.5058, - "step": 17210 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004825310594234882, - "loss": 3.521, - "step": 17215 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048246576682853736, - "loss": 3.4941, - "step": 17220 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048240046051309593, - "loss": 3.6331, - "step": 17225 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004823351404820747, - "loss": 3.5374, - "step": 17230 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048226980674038534, - "loss": 3.5544, - "step": 17235 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048220445929294067, - "loss": 3.4389, - "step": 17240 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048213909814465443, - "loss": 3.5835, - "step": 17245 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048207372330044137, - "loss": 3.4002, - "step": 17250 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004820083347652176, - "loss": 3.5722, - "step": 17255 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004819429325438997, - "loss": 3.6024, - "step": 17260 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004818775166414057, - "loss": 3.6439, - "step": 17265 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004818120870626547, - "loss": 3.4983, - "step": 17270 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048174664381256643, - "loss": 3.4879, - "step": 17275 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048168118689606197, - "loss": 3.5338, - "step": 17280 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048161571631806337, - "loss": 3.4761, - "step": 17285 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004815502320834936, - "loss": 3.5412, - "step": 17290 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048148473419727694, - "loss": 3.5941, - "step": 17295 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004814192226643383, - "loss": 3.5358, - "step": 17300 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048135369748960395, - "loss": 3.459, - "step": 17305 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004812881586780008, - "loss": 3.4669, - "step": 17310 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004812226062344573, - "loss": 3.5167, - "step": 17315 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048115704016390265, - "loss": 3.6433, - "step": 17320 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004810914604712669, - "loss": 3.4723, - "step": 17325 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048102586716148145, - "loss": 3.6129, - "step": 17330 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048096026023947846, - "loss": 3.5888, - "step": 17335 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048089463971019133, - "loss": 3.5604, - "step": 17340 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004808290055785544, - "loss": 3.5614, - "step": 17345 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004807633578495029, - "loss": 3.5267, - "step": 17350 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004806976965279734, - "loss": 3.5437, - "step": 17355 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004806320216189031, - "loss": 3.577, - "step": 17360 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004805663331272306, - "loss": 3.5452, - "step": 17365 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048050063105789515, - "loss": 3.5119, - "step": 17370 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004804349154158373, - "loss": 3.5662, - "step": 17375 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004803691862059985, - "loss": 3.5817, - "step": 17380 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004803034434333214, - "loss": 3.4176, - "step": 17385 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004802376871027493, - "loss": 3.5071, - "step": 17390 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048017191721922683, - "loss": 3.4767, - "step": 17395 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048010613378769945, - "loss": 3.4762, - "step": 17400 - }, - { - "epoch": 0.3, - "learning_rate": 0.00048004033681311407, - "loss": 3.5571, - "step": 17405 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047997452630041783, - "loss": 3.4705, - "step": 17410 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047990870225455976, - "loss": 3.4789, - "step": 17415 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047984286468048923, - "loss": 3.5571, - "step": 17420 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047977701358315687, - "loss": 3.5161, - "step": 17425 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047971114896751455, - "loss": 3.6078, - "step": 17430 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047964527083851476, - "loss": 3.4624, - "step": 17435 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047957937920111126, - "loss": 3.4843, - "step": 17440 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047951347406025886, - "loss": 3.5239, - "step": 17445 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047944755542091307, - "loss": 3.5355, - "step": 17450 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047938162328803086, - "loss": 3.427, - "step": 17455 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004793156776665699, - "loss": 3.5715, - "step": 17460 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047924971856148896, - "loss": 3.6401, - "step": 17465 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004791837459777478, - "loss": 3.5878, - "step": 17470 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004791177599203072, - "loss": 3.4139, - "step": 17475 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047905176039412904, - "loss": 3.5894, - "step": 17480 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004789857474041762, - "loss": 3.5054, - "step": 17485 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004789197209554124, - "loss": 3.4073, - "step": 17490 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004788536810528025, - "loss": 3.546, - "step": 17495 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004787876277013124, - "loss": 3.4948, - "step": 17500 - }, - { - "epoch": 0.3, - "eval_loss": 3.5631024837493896, - "eval_runtime": 149.9514, - "eval_samples_per_second": 12.277, - "eval_steps_per_second": 0.774, - "step": 17500 - }, - { - "epoch": 0.3, - "learning_rate": 0.000478721560905909, - "loss": 3.5123, - "step": 17505 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004786554806715601, - "loss": 3.4436, - "step": 17510 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004785893870032347, - "loss": 3.551, - "step": 17515 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004785232799059026, - "loss": 3.5769, - "step": 17520 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004784571593845349, - "loss": 3.5486, - "step": 17525 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047839102544410325, - "loss": 3.6552, - "step": 17530 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047832487808958083, - "loss": 3.5692, - "step": 17535 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004782587173259414, - "loss": 3.4071, - "step": 17540 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047819254315816006, - "loss": 3.5011, - "step": 17545 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047812635559121256, - "loss": 3.5682, - "step": 17550 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047806015463007615, - "loss": 3.4182, - "step": 17555 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047799394027972866, - "loss": 3.5072, - "step": 17560 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004779277125451489, - "loss": 3.4896, - "step": 17565 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047786147143131716, - "loss": 3.3961, - "step": 17570 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047779521694321425, - "loss": 3.5795, - "step": 17575 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004777289490858222, - "loss": 3.4299, - "step": 17580 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047766266786412415, - "loss": 3.5548, - "step": 17585 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047759637328310386, - "loss": 3.4617, - "step": 17590 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047753006534774646, - "loss": 3.4899, - "step": 17595 - }, - { - "epoch": 0.3, - "learning_rate": 0.000477463744063038, - "loss": 3.5963, - "step": 17600 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047739740943396553, - "loss": 3.4351, - "step": 17605 - }, - { - "epoch": 0.3, - "learning_rate": 0.0004773310614655169, - "loss": 3.5722, - "step": 17610 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047726470016268134, - "loss": 3.5019, - "step": 17615 - }, - { - "epoch": 0.3, - "learning_rate": 0.00047719832553044876, - "loss": 3.4795, - "step": 17620 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004771319375738103, - "loss": 3.4939, - "step": 17625 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004770655362977578, - "loss": 3.459, - "step": 17630 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004769991217072844, - "loss": 3.4321, - "step": 17635 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004769326938073843, - "loss": 3.588, - "step": 17640 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004768662526030523, - "loss": 3.6168, - "step": 17645 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004767997980992846, - "loss": 3.4792, - "step": 17650 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004767333303010781, - "loss": 3.4928, - "step": 17655 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004766668492134309, - "loss": 3.6081, - "step": 17660 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004766003548413421, - "loss": 3.5809, - "step": 17665 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047653384718981164, - "loss": 3.5654, - "step": 17670 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047646732626384066, - "loss": 3.4998, - "step": 17675 - }, - { - "epoch": 0.31, - "learning_rate": 0.000476400792068431, - "loss": 3.3947, - "step": 17680 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047633424460858585, - "loss": 3.6234, - "step": 17685 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004762676838893093, - "loss": 3.4851, - "step": 17690 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047620110991560623, - "loss": 3.4785, - "step": 17695 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047613452269248274, - "loss": 3.5703, - "step": 17700 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047606792222494566, - "loss": 3.5448, - "step": 17705 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004760013085180033, - "loss": 3.4537, - "step": 17710 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004759346815766646, - "loss": 3.515, - "step": 17715 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004758680414059393, - "loss": 3.5411, - "step": 17720 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004758013880108387, - "loss": 3.5154, - "step": 17725 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004757347213963747, - "loss": 3.4914, - "step": 17730 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004756680415675603, - "loss": 3.4521, - "step": 17735 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004756013485294094, - "loss": 3.5936, - "step": 17740 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004755346422869371, - "loss": 3.4493, - "step": 17745 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004754679228451592, - "loss": 3.5317, - "step": 17750 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047540119020909286, - "loss": 3.5307, - "step": 17755 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047533444438375587, - "loss": 3.5228, - "step": 17760 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004752676853741673, - "loss": 3.4958, - "step": 17765 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004752009131853469, - "loss": 3.4806, - "step": 17770 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004751341278223159, - "loss": 3.4653, - "step": 17775 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004750673292900959, - "loss": 3.5797, - "step": 17780 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047500051759371, - "loss": 3.5707, - "step": 17785 - }, - { - "epoch": 0.31, - "learning_rate": 0.000474933692738182, - "loss": 3.5677, - "step": 17790 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047486685472853687, - "loss": 3.5105, - "step": 17795 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004748000035698003, - "loss": 3.5625, - "step": 17800 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047473313926699954, - "loss": 3.5567, - "step": 17805 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047466626182516203, - "loss": 3.5687, - "step": 17810 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047459937124931685, - "loss": 3.5199, - "step": 17815 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004745324675444938, - "loss": 3.5174, - "step": 17820 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047446555071572354, - "loss": 3.4744, - "step": 17825 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004743986207680381, - "loss": 3.3464, - "step": 17830 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047433167770647, - "loss": 3.55, - "step": 17835 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004742647215360533, - "loss": 3.4027, - "step": 17840 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047419775226182264, - "loss": 3.5334, - "step": 17845 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004741307698888137, - "loss": 3.5083, - "step": 17850 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047406377442206325, - "loss": 3.628, - "step": 17855 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047399676586660914, - "loss": 3.6128, - "step": 17860 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004739297442274899, - "loss": 3.5811, - "step": 17865 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047386270950974525, - "loss": 3.5103, - "step": 17870 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004737956617184159, - "loss": 3.5447, - "step": 17875 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047372860085854345, - "loss": 3.6269, - "step": 17880 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004736615269351706, - "loss": 3.5275, - "step": 17885 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047359443995334083, - "loss": 3.4888, - "step": 17890 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047352733991809894, - "loss": 3.5246, - "step": 17895 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004734602268344903, - "loss": 3.4484, - "step": 17900 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004733931007075617, - "loss": 3.5838, - "step": 17905 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047332596154236046, - "loss": 3.5475, - "step": 17910 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047325880934393524, - "loss": 3.5593, - "step": 17915 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047319164411733545, - "loss": 3.6149, - "step": 17920 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047312446586761164, - "loss": 3.4426, - "step": 17925 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004730572745998153, - "loss": 3.524, - "step": 17930 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004729900703189988, - "loss": 3.5848, - "step": 17935 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047292285303021544, - "loss": 3.4916, - "step": 17940 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004728556227385199, - "loss": 3.5414, - "step": 17945 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004727883794489673, - "loss": 3.5673, - "step": 17950 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004727211231666141, - "loss": 3.5469, - "step": 17955 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004726538538965177, - "loss": 3.4563, - "step": 17960 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047258657164373624, - "loss": 3.512, - "step": 17965 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047251927641332915, - "loss": 3.481, - "step": 17970 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004724519682103566, - "loss": 3.5488, - "step": 17975 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004723846470398798, - "loss": 3.3891, - "step": 17980 - }, - { - "epoch": 0.31, - "learning_rate": 0.000472317312906961, - "loss": 3.533, - "step": 17985 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004722499658166634, - "loss": 3.6264, - "step": 17990 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047218260577405123, - "loss": 3.5065, - "step": 17995 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004721152327841894, - "loss": 3.5378, - "step": 18000 - }, - { - "epoch": 0.31, - "eval_loss": 3.560037136077881, - "eval_runtime": 149.8718, - "eval_samples_per_second": 12.284, - "eval_steps_per_second": 0.774, - "step": 18000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047204784685214425, - "loss": 3.637, - "step": 18005 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047198044798298273, - "loss": 3.5406, - "step": 18010 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004719130361817728, - "loss": 3.5347, - "step": 18015 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047184561145358376, - "loss": 3.5563, - "step": 18020 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004717781738034853, - "loss": 3.6075, - "step": 18025 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047171072323654847, - "loss": 3.411, - "step": 18030 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004716432597578454, - "loss": 3.505, - "step": 18035 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047157578337244873, - "loss": 3.6218, - "step": 18040 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004715082940854326, - "loss": 3.4998, - "step": 18045 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004714407919018716, - "loss": 3.5515, - "step": 18050 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004713732768268417, - "loss": 3.6125, - "step": 18055 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004713057488654197, - "loss": 3.5094, - "step": 18060 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004712382080226833, - "loss": 3.5357, - "step": 18065 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004711706543037112, - "loss": 3.5171, - "step": 18070 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004711030877135831, - "loss": 3.5248, - "step": 18075 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004710355082573798, - "loss": 3.3577, - "step": 18080 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047096791594018273, - "loss": 3.4937, - "step": 18085 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004709003107670747, - "loss": 3.5847, - "step": 18090 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047083269274313897, - "loss": 3.5041, - "step": 18095 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004707650618734604, - "loss": 3.518, - "step": 18100 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004706974181631243, - "loss": 3.3611, - "step": 18105 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004706297616172172, - "loss": 3.5533, - "step": 18110 - }, - { - "epoch": 0.31, - "learning_rate": 0.00047056209224082636, - "loss": 3.4176, - "step": 18115 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004704944100390404, - "loss": 3.5398, - "step": 18120 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004704267150169485, - "loss": 3.4949, - "step": 18125 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004703590071796411, - "loss": 3.5204, - "step": 18130 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004702912865322095, - "loss": 3.4492, - "step": 18135 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004702235530797458, - "loss": 3.6095, - "step": 18140 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004701558068273433, - "loss": 3.4533, - "step": 18145 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004700880477800962, - "loss": 3.4747, - "step": 18150 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004700202759430995, - "loss": 3.6068, - "step": 18155 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004699524913214495, - "loss": 3.5339, - "step": 18160 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004698846939202431, - "loss": 3.5689, - "step": 18165 - }, - { - "epoch": 0.31, - "learning_rate": 0.00046981688374457835, - "loss": 3.4747, - "step": 18170 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004697490607995543, - "loss": 3.5512, - "step": 18175 - }, - { - "epoch": 0.31, - "learning_rate": 0.00046968122509027075, - "loss": 3.5927, - "step": 18180 - }, - { - "epoch": 0.31, - "learning_rate": 0.00046961337662182874, - "loss": 3.4706, - "step": 18185 - }, - { - "epoch": 0.31, - "learning_rate": 0.00046954551539933007, - "loss": 3.556, - "step": 18190 - }, - { - "epoch": 0.31, - "learning_rate": 0.0004694776414278775, - "loss": 3.4009, - "step": 18195 - }, - { - "epoch": 0.31, - "learning_rate": 0.00046940975471257483, - "loss": 3.4902, - "step": 18200 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046934185525852683, - "loss": 3.4386, - "step": 18205 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004692739430708392, - "loss": 3.4121, - "step": 18210 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004692060181546186, - "loss": 3.5268, - "step": 18215 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046913808051497253, - "loss": 3.5333, - "step": 18220 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046907013015700977, - "loss": 3.5135, - "step": 18225 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004690021670858395, - "loss": 3.4469, - "step": 18230 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004689341913065725, - "loss": 3.4252, - "step": 18235 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004688662028243201, - "loss": 3.5202, - "step": 18240 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046879820164419454, - "loss": 3.596, - "step": 18245 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046873018777130945, - "loss": 3.5158, - "step": 18250 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004686621612107789, - "loss": 3.4576, - "step": 18255 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046859412196771817, - "loss": 3.4635, - "step": 18260 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046852607004724354, - "loss": 3.5205, - "step": 18265 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004684580054544721, - "loss": 3.5507, - "step": 18270 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004683899281945221, - "loss": 3.4769, - "step": 18275 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046832183827251244, - "loss": 3.4863, - "step": 18280 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046825373569356305, - "loss": 3.5763, - "step": 18285 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004681856204627951, - "loss": 3.4605, - "step": 18290 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004681174925853305, - "loss": 3.4173, - "step": 18295 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046804935206629187, - "loss": 3.4477, - "step": 18300 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046798119891080343, - "loss": 3.4775, - "step": 18305 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004679130331239895, - "loss": 3.5202, - "step": 18310 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004678448547109761, - "loss": 3.4684, - "step": 18315 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046777666367688996, - "loss": 3.4995, - "step": 18320 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004677084600268584, - "loss": 3.4431, - "step": 18325 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004676402437660102, - "loss": 3.5776, - "step": 18330 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004675720148994747, - "loss": 3.5402, - "step": 18335 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046750377343238256, - "loss": 3.5223, - "step": 18340 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046743551936986513, - "loss": 3.332, - "step": 18345 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004673672527170547, - "loss": 3.5664, - "step": 18350 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046729897347908467, - "loss": 3.5212, - "step": 18355 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004672306816610892, - "loss": 3.4357, - "step": 18360 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004671623772682034, - "loss": 3.4875, - "step": 18365 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046709406030556364, - "loss": 3.6394, - "step": 18370 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046702573077830677, - "loss": 3.5538, - "step": 18375 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004669573886915711, - "loss": 3.4598, - "step": 18380 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004668890340504954, - "loss": 3.6014, - "step": 18385 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004668206668602196, - "loss": 3.5697, - "step": 18390 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004667522871258846, - "loss": 3.3325, - "step": 18395 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004666838948526321, - "loss": 3.5063, - "step": 18400 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046661549004560515, - "loss": 3.5601, - "step": 18405 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004665470727099471, - "loss": 3.5294, - "step": 18410 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046647864285080274, - "loss": 3.4877, - "step": 18415 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046641020047331764, - "loss": 3.5173, - "step": 18420 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004663417455826383, - "loss": 3.5529, - "step": 18425 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004662732781839122, - "loss": 3.4485, - "step": 18430 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046620479828228767, - "loss": 3.6021, - "step": 18435 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046613630588291413, - "loss": 3.5351, - "step": 18440 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004660678009909419, - "loss": 3.5565, - "step": 18445 - }, - { - "epoch": 0.32, - "learning_rate": 0.000465999283611522, - "loss": 3.5563, - "step": 18450 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004659307537498068, - "loss": 3.4886, - "step": 18455 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046586221141094923, - "loss": 3.5287, - "step": 18460 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004657936566001034, - "loss": 3.3976, - "step": 18465 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046572508932242424, - "loss": 3.5678, - "step": 18470 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046565650958306775, - "loss": 3.5914, - "step": 18475 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004655879173871908, - "loss": 3.4357, - "step": 18480 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046551931273995087, - "loss": 3.5462, - "step": 18485 - }, - { - "epoch": 0.32, - "learning_rate": 0.000465450695646507, - "loss": 3.6432, - "step": 18490 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004653820661120188, - "loss": 3.509, - "step": 18495 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004653134241416467, - "loss": 3.3627, - "step": 18500 - }, - { - "epoch": 0.32, - "eval_loss": 3.539888858795166, - "eval_runtime": 149.9774, - "eval_samples_per_second": 12.275, - "eval_steps_per_second": 0.773, - "step": 18500 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004652447697405524, - "loss": 3.4926, - "step": 18505 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046517610291389825, - "loss": 3.4943, - "step": 18510 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004651074236668477, - "loss": 3.5992, - "step": 18515 - }, - { - "epoch": 0.32, - "learning_rate": 0.000465038732004565, - "loss": 3.4454, - "step": 18520 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046497002793221545, - "loss": 3.4608, - "step": 18525 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004649013114549653, - "loss": 3.426, - "step": 18530 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004648325825779817, - "loss": 3.5191, - "step": 18535 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046476384130643254, - "loss": 3.6492, - "step": 18540 - }, - { - "epoch": 0.32, - "learning_rate": 0.000464695087645487, - "loss": 3.4617, - "step": 18545 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046462632160031484, - "loss": 3.506, - "step": 18550 - }, - { - "epoch": 0.32, - "learning_rate": 0.000464557543176087, - "loss": 3.5133, - "step": 18555 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004644887523779753, - "loss": 3.5006, - "step": 18560 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004644199492111524, - "loss": 3.2805, - "step": 18565 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046435113368079197, - "loss": 3.5573, - "step": 18570 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046428230579206853, - "loss": 3.5132, - "step": 18575 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004642134655501576, - "loss": 3.661, - "step": 18580 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004641446129602358, - "loss": 3.3854, - "step": 18585 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046407574802748017, - "loss": 3.5102, - "step": 18590 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046400687075706925, - "loss": 3.4589, - "step": 18595 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004639379811541821, - "loss": 3.4866, - "step": 18600 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046386907922399894, - "loss": 3.5285, - "step": 18605 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046380016497170096, - "loss": 3.5076, - "step": 18610 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004637312384024699, - "loss": 3.4932, - "step": 18615 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004636622995214889, - "loss": 3.4528, - "step": 18620 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004635933483339417, - "loss": 3.586, - "step": 18625 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004635243848450131, - "loss": 3.4847, - "step": 18630 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004634554090598888, - "loss": 3.525, - "step": 18635 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046338642098375544, - "loss": 3.4765, - "step": 18640 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004633174206218006, - "loss": 3.577, - "step": 18645 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004632484079792127, - "loss": 3.4143, - "step": 18650 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046317938306118117, - "loss": 3.5152, - "step": 18655 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004631103458728963, - "loss": 3.4873, - "step": 18660 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004630412964195493, - "loss": 3.4814, - "step": 18665 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046297223470633247, - "loss": 3.5824, - "step": 18670 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004629031607384388, - "loss": 3.4629, - "step": 18675 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046283407452106225, - "loss": 3.5677, - "step": 18680 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046276497605939786, - "loss": 3.4402, - "step": 18685 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004626958653586414, - "loss": 3.4675, - "step": 18690 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004626267424239896, - "loss": 3.4872, - "step": 18695 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004625576072606403, - "loss": 3.4781, - "step": 18700 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046248845987379204, - "loss": 3.5359, - "step": 18705 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046241930026864425, - "loss": 3.5084, - "step": 18710 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004623501284503976, - "loss": 3.4936, - "step": 18715 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004622809444242533, - "loss": 3.5277, - "step": 18720 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004622117481954135, - "loss": 3.5329, - "step": 18725 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046214253976908167, - "loss": 3.459, - "step": 18730 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046207331915046183, - "loss": 3.5331, - "step": 18735 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046200408634475894, - "loss": 3.4192, - "step": 18740 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046193484135717913, - "loss": 3.4791, - "step": 18745 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004618655841929291, - "loss": 3.4814, - "step": 18750 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004617963148572167, - "loss": 3.4538, - "step": 18755 - }, - { - "epoch": 0.32, - "learning_rate": 0.00046172703335525047, - "loss": 3.4978, - "step": 18760 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004616577396922403, - "loss": 3.5329, - "step": 18765 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004615884338733966, - "loss": 3.5177, - "step": 18770 - }, - { - "epoch": 0.32, - "learning_rate": 0.0004615191159039308, - "loss": 3.5178, - "step": 18775 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004614497857890552, - "loss": 3.5395, - "step": 18780 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004613804435339832, - "loss": 3.5156, - "step": 18785 - }, - { - "epoch": 0.33, - "learning_rate": 0.00046131108914392884, - "loss": 3.4949, - "step": 18790 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004612417226241073, - "loss": 3.4067, - "step": 18795 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004611723439797346, - "loss": 3.4388, - "step": 18800 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004611029532160276, - "loss": 3.5248, - "step": 18805 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004610335503382041, - "loss": 3.5521, - "step": 18810 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004609641353514829, - "loss": 3.5943, - "step": 18815 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004608947082610838, - "loss": 3.5184, - "step": 18820 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004608252690722271, - "loss": 3.4862, - "step": 18825 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004607558177901344, - "loss": 3.6086, - "step": 18830 - }, - { - "epoch": 0.33, - "learning_rate": 0.000460686354420028, - "loss": 3.4269, - "step": 18835 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004606168789671314, - "loss": 3.583, - "step": 18840 - }, - { - "epoch": 0.33, - "learning_rate": 0.00046054739143666857, - "loss": 3.4697, - "step": 18845 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004604778918338647, - "loss": 3.5615, - "step": 18850 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004604083801639457, - "loss": 3.5213, - "step": 18855 - }, - { - "epoch": 0.33, - "learning_rate": 0.00046033885643213866, - "loss": 3.443, - "step": 18860 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004602693206436714, - "loss": 3.4458, - "step": 18865 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004601997728037726, - "loss": 3.4955, - "step": 18870 - }, - { - "epoch": 0.33, - "learning_rate": 0.00046013021291767183, - "loss": 3.5146, - "step": 18875 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004600606409905997, - "loss": 3.5533, - "step": 18880 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045999105702778775, - "loss": 3.415, - "step": 18885 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004599214610344682, - "loss": 3.4769, - "step": 18890 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045985185301587435, - "loss": 3.4555, - "step": 18895 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004597822329772403, - "loss": 3.4481, - "step": 18900 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045971260092380137, - "loss": 3.391, - "step": 18905 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045964295686079325, - "loss": 3.3999, - "step": 18910 - }, - { - "epoch": 0.33, - "learning_rate": 0.000459573300793453, - "loss": 3.5864, - "step": 18915 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045950363272701824, - "loss": 3.5394, - "step": 18920 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045943395266672787, - "loss": 3.5128, - "step": 18925 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045936426061782135, - "loss": 3.5267, - "step": 18930 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045929455658553895, - "loss": 3.4345, - "step": 18935 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045922484057512247, - "loss": 3.6213, - "step": 18940 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045915511259181385, - "loss": 3.5013, - "step": 18945 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045908537264085657, - "loss": 3.561, - "step": 18950 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004590156207274945, - "loss": 3.5136, - "step": 18955 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045894585685697263, - "loss": 3.4877, - "step": 18960 - }, - { - "epoch": 0.33, - "learning_rate": 0.000458876081034537, - "loss": 3.4736, - "step": 18965 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004588062932654343, - "loss": 3.5516, - "step": 18970 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045873649355491217, - "loss": 3.4, - "step": 18975 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004586666819082192, - "loss": 3.5071, - "step": 18980 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004585968583306049, - "loss": 3.5604, - "step": 18985 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045852702282731976, - "loss": 3.5457, - "step": 18990 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004584571754036149, - "loss": 3.4559, - "step": 18995 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004583873160647425, - "loss": 3.6102, - "step": 19000 - }, - { - "epoch": 0.33, - "eval_loss": 3.5276401042938232, - "eval_runtime": 149.8717, - "eval_samples_per_second": 12.284, - "eval_steps_per_second": 0.774, - "step": 19000 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004583174448159557, - "loss": 3.4795, - "step": 19005 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004582475616625084, - "loss": 3.4671, - "step": 19010 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045817766660965543, - "loss": 3.5596, - "step": 19015 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004581077596626526, - "loss": 3.6208, - "step": 19020 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004580378408267565, - "loss": 3.5096, - "step": 19025 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045796791010722487, - "loss": 3.4556, - "step": 19030 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045789796750931574, - "loss": 3.4603, - "step": 19035 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045782801303828873, - "loss": 3.4485, - "step": 19040 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004577580466994041, - "loss": 3.4624, - "step": 19045 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045768806849792276, - "loss": 3.4523, - "step": 19050 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045761807843910685, - "loss": 3.5471, - "step": 19055 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004575480765282192, - "loss": 3.3223, - "step": 19060 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045747806277052363, - "loss": 3.4825, - "step": 19065 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045740803717128484, - "loss": 3.3537, - "step": 19070 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004573379997357682, - "loss": 3.3989, - "step": 19075 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004572679504692405, - "loss": 3.5855, - "step": 19080 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045719788937696883, - "loss": 3.5294, - "step": 19085 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004571278164642215, - "loss": 3.4447, - "step": 19090 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004570577317362677, - "loss": 3.6258, - "step": 19095 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045698763519837725, - "loss": 3.4541, - "step": 19100 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045691752685582133, - "loss": 3.5693, - "step": 19105 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004568474067138716, - "loss": 3.4704, - "step": 19110 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045677727477780056, - "loss": 3.5682, - "step": 19115 - }, - { - "epoch": 0.33, - "learning_rate": 0.000456707131052882, - "loss": 3.4419, - "step": 19120 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045663697554439034, - "loss": 3.4335, - "step": 19125 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004565668082576008, - "loss": 3.5477, - "step": 19130 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045649662919778976, - "loss": 3.5055, - "step": 19135 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045642643837023406, - "loss": 3.6236, - "step": 19140 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004563562357802121, - "loss": 3.5487, - "step": 19145 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045628602143300243, - "loss": 3.4841, - "step": 19150 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004562157953338848, - "loss": 3.4994, - "step": 19155 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045614555748814005, - "loss": 3.4607, - "step": 19160 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045607530790104956, - "loss": 3.4797, - "step": 19165 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045600504657789594, - "loss": 3.5536, - "step": 19170 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045593477352396213, - "loss": 3.5152, - "step": 19175 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004558644887445326, - "loss": 3.5431, - "step": 19180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004557941922448923, - "loss": 3.4514, - "step": 19185 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045572388403032724, - "loss": 3.3985, - "step": 19190 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004556535641061241, - "loss": 3.4395, - "step": 19195 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045558323247757063, - "loss": 3.4796, - "step": 19200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045551288914995536, - "loss": 3.4244, - "step": 19205 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045544253412856806, - "loss": 3.4544, - "step": 19210 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045537216741869865, - "loss": 3.5251, - "step": 19215 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045530178902563847, - "loss": 3.5224, - "step": 19220 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004552313989546798, - "loss": 3.2957, - "step": 19225 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004551609972111154, - "loss": 3.4405, - "step": 19230 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004550905838002392, - "loss": 3.4844, - "step": 19235 - }, - { - "epoch": 0.33, - "learning_rate": 0.000455020158727346, - "loss": 3.549, - "step": 19240 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004549497219977312, - "loss": 3.5311, - "step": 19245 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004548792736166915, - "loss": 3.4819, - "step": 19250 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045480881358952416, - "loss": 3.492, - "step": 19255 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045473834192152744, - "loss": 3.4925, - "step": 19260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045466785861800034, - "loss": 3.4899, - "step": 19265 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045459736368424304, - "loss": 3.5294, - "step": 19270 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004545268571255563, - "loss": 3.4631, - "step": 19275 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004544563389472417, - "loss": 3.436, - "step": 19280 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004543858091546021, - "loss": 3.4474, - "step": 19285 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045431526775294086, - "loss": 3.4655, - "step": 19290 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045424471474756227, - "loss": 3.4941, - "step": 19295 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045417415014377166, - "loss": 3.5932, - "step": 19300 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004541035739468751, - "loss": 3.3979, - "step": 19305 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045403298616217963, - "loss": 3.4772, - "step": 19310 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045396238679499293, - "loss": 3.5518, - "step": 19315 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004538917758506237, - "loss": 3.4183, - "step": 19320 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004538211533343817, - "loss": 3.5486, - "step": 19325 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004537505192515773, - "loss": 3.3749, - "step": 19330 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045367987360752174, - "loss": 3.4757, - "step": 19335 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045360921640752737, - "loss": 3.4974, - "step": 19340 - }, - { - "epoch": 0.33, - "learning_rate": 0.0004535385476569071, - "loss": 3.4206, - "step": 19345 - }, - { - "epoch": 0.33, - "learning_rate": 0.000453467867360975, - "loss": 3.5497, - "step": 19350 - }, - { - "epoch": 0.33, - "learning_rate": 0.00045339717552504564, - "loss": 3.4373, - "step": 19355 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004533264721544349, - "loss": 3.4262, - "step": 19360 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004532557572544593, - "loss": 3.5594, - "step": 19365 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004531850308304361, - "loss": 3.4997, - "step": 19370 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045311429288768374, - "loss": 3.464, - "step": 19375 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045304354343152117, - "loss": 3.4962, - "step": 19380 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045297278246726844, - "loss": 3.5346, - "step": 19385 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045290201000024654, - "loss": 3.5029, - "step": 19390 - }, - { - "epoch": 0.34, - "learning_rate": 0.000452831226035777, - "loss": 3.5546, - "step": 19395 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045276043057918263, - "loss": 3.4901, - "step": 19400 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004526896236357866, - "loss": 3.4695, - "step": 19405 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004526188052109135, - "loss": 3.5047, - "step": 19410 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045254797530988844, - "loss": 3.451, - "step": 19415 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004524771339380374, - "loss": 3.436, - "step": 19420 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045240628110068725, - "loss": 3.5381, - "step": 19425 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045233541680316593, - "loss": 3.4288, - "step": 19430 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004522645410508019, - "loss": 3.5495, - "step": 19435 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004521936538489248, - "loss": 3.4837, - "step": 19440 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045212275520286477, - "loss": 3.4782, - "step": 19445 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045205184511795335, - "loss": 3.4985, - "step": 19450 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004519809235995223, - "loss": 3.535, - "step": 19455 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004519099906529047, - "loss": 3.5755, - "step": 19460 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045183904628343444, - "loss": 3.5931, - "step": 19465 - }, - { - "epoch": 0.34, - "learning_rate": 0.000451768090496446, - "loss": 3.5447, - "step": 19470 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004516971232972749, - "loss": 3.435, - "step": 19475 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004516261446912576, - "loss": 3.43, - "step": 19480 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004515551546837313, - "loss": 3.4134, - "step": 19485 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004514841532800341, - "loss": 3.393, - "step": 19490 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045141314048550486, - "loss": 3.4855, - "step": 19495 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004513421163054834, - "loss": 3.4721, - "step": 19500 - }, - { - "epoch": 0.34, - "eval_loss": 3.5094563961029053, - "eval_runtime": 149.8708, - "eval_samples_per_second": 12.284, - "eval_steps_per_second": 0.774, - "step": 19500 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045127108074531045, - "loss": 3.4603, - "step": 19505 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045120003381032755, - "loss": 3.3118, - "step": 19510 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045112897550587705, - "loss": 3.4607, - "step": 19515 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004510579058373019, - "loss": 3.4662, - "step": 19520 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004509868248099465, - "loss": 3.5243, - "step": 19525 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004509157324291557, - "loss": 3.4312, - "step": 19530 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004508446287002752, - "loss": 3.3722, - "step": 19535 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004507735136286517, - "loss": 3.3688, - "step": 19540 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045070238721963264, - "loss": 3.3627, - "step": 19545 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004506312494785665, - "loss": 3.4587, - "step": 19550 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045056010041080236, - "loss": 3.3304, - "step": 19555 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004504889400216902, - "loss": 3.4772, - "step": 19560 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045041776831658103, - "loss": 3.4572, - "step": 19565 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004503465853008266, - "loss": 3.504, - "step": 19570 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004502753909797794, - "loss": 3.4871, - "step": 19575 - }, - { - "epoch": 0.34, - "learning_rate": 0.00045020418535879293, - "loss": 3.5595, - "step": 19580 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004501329684432215, - "loss": 3.5133, - "step": 19585 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004500617402384204, - "loss": 3.4042, - "step": 19590 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004499905007497454, - "loss": 3.6127, - "step": 19595 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044991924998255344, - "loss": 3.344, - "step": 19600 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004498479879422022, - "loss": 3.4879, - "step": 19605 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004497767146340502, - "loss": 3.5619, - "step": 19610 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044970543006345686, - "loss": 3.4435, - "step": 19615 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044963413423578245, - "loss": 3.5145, - "step": 19620 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044956282715638796, - "loss": 3.5095, - "step": 19625 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044949150883063545, - "loss": 3.5551, - "step": 19630 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044942017926388755, - "loss": 3.5651, - "step": 19635 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044934883846150793, - "loss": 3.3975, - "step": 19640 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004492774864288611, - "loss": 3.5149, - "step": 19645 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044920612317131224, - "loss": 3.4926, - "step": 19650 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004491347486942277, - "loss": 3.3984, - "step": 19655 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044906336300297423, - "loss": 3.3962, - "step": 19660 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004489919661029198, - "loss": 3.4718, - "step": 19665 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044892055799943323, - "loss": 3.4969, - "step": 19670 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044884913869788377, - "loss": 3.3791, - "step": 19675 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004487777082036421, - "loss": 3.4044, - "step": 19680 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044870626652207903, - "loss": 3.4759, - "step": 19685 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004486348136585669, - "loss": 3.4839, - "step": 19690 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044856334961847867, - "loss": 3.5427, - "step": 19695 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044849187440718774, - "loss": 3.5028, - "step": 19700 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004484203880300689, - "loss": 3.5073, - "step": 19705 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044834889049249755, - "loss": 3.4769, - "step": 19710 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004482773817998499, - "loss": 3.5275, - "step": 19715 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004482058619575031, - "loss": 3.5522, - "step": 19720 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044813433097083493, - "loss": 3.5725, - "step": 19725 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004480627888452243, - "loss": 3.3296, - "step": 19730 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004479912355860508, - "loss": 3.575, - "step": 19735 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044791967119869473, - "loss": 3.485, - "step": 19740 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044784809568853754, - "loss": 3.5094, - "step": 19745 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004477765090609612, - "loss": 3.4432, - "step": 19750 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004477049113213488, - "loss": 3.4815, - "step": 19755 - }, - { - "epoch": 0.34, - "learning_rate": 0.000447633302475084, - "loss": 3.5307, - "step": 19760 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004475616825275514, - "loss": 3.479, - "step": 19765 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004474900514841366, - "loss": 3.4396, - "step": 19770 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004474184093502258, - "loss": 3.5377, - "step": 19775 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004473467561312061, - "loss": 3.5034, - "step": 19780 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044727509183246553, - "loss": 3.4338, - "step": 19785 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004472034164593927, - "loss": 3.492, - "step": 19790 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004471317300173776, - "loss": 3.478, - "step": 19795 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004470600325118102, - "loss": 3.5671, - "step": 19800 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044698832394808206, - "loss": 3.3984, - "step": 19805 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004469166043315853, - "loss": 3.5496, - "step": 19810 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004468448736677129, - "loss": 3.4328, - "step": 19815 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004467731319618585, - "loss": 3.4768, - "step": 19820 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004467013792194168, - "loss": 3.4958, - "step": 19825 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044662961544578313, - "loss": 3.4901, - "step": 19830 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044655784064635406, - "loss": 3.4205, - "step": 19835 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044648605482652625, - "loss": 3.5096, - "step": 19840 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004464142579916979, - "loss": 3.5586, - "step": 19845 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044634245014726773, - "loss": 3.5259, - "step": 19850 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044627063129863535, - "loss": 3.396, - "step": 19855 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004461988014512011, - "loss": 3.4005, - "step": 19860 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044612696061036613, - "loss": 3.4798, - "step": 19865 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044605510878153277, - "loss": 3.5662, - "step": 19870 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044598324597010366, - "loss": 3.4116, - "step": 19875 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004459113721814826, - "loss": 3.4119, - "step": 19880 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044583948742107416, - "loss": 3.4519, - "step": 19885 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044576759169428365, - "loss": 3.4095, - "step": 19890 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004456956850065173, - "loss": 3.5294, - "step": 19895 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004456237673631822, - "loss": 3.3993, - "step": 19900 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044555183876968595, - "loss": 3.4356, - "step": 19905 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004454798992314375, - "loss": 3.4959, - "step": 19910 - }, - { - "epoch": 0.34, - "learning_rate": 0.0004454079487538462, - "loss": 3.5206, - "step": 19915 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044533598734232226, - "loss": 3.3217, - "step": 19920 - }, - { - "epoch": 0.34, - "learning_rate": 0.000445264015002277, - "loss": 3.486, - "step": 19925 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044519203173912214, - "loss": 3.5203, - "step": 19930 - }, - { - "epoch": 0.34, - "learning_rate": 0.00044512003755827075, - "loss": 3.4889, - "step": 19935 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004450480324651362, - "loss": 3.4719, - "step": 19940 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044497601646513294, - "loss": 3.4491, - "step": 19945 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044490398956367635, - "loss": 3.4391, - "step": 19950 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004448319517661823, - "loss": 3.3457, - "step": 19955 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044475990307806765, - "loss": 3.3828, - "step": 19960 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044468784350475024, - "loss": 3.4094, - "step": 19965 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004446157730516485, - "loss": 3.3578, - "step": 19970 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044454369172418183, - "loss": 3.4901, - "step": 19975 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004444715995277702, - "loss": 3.4578, - "step": 19980 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004443994964678347, - "loss": 3.5705, - "step": 19985 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004443273825497972, - "loss": 3.356, - "step": 19990 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044425525777908015, - "loss": 3.5245, - "step": 19995 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044418312216110703, - "loss": 3.506, - "step": 20000 - }, - { - "epoch": 0.35, - "eval_loss": 3.5002212524414062, - "eval_runtime": 149.672, - "eval_samples_per_second": 12.3, - "eval_steps_per_second": 0.775, - "step": 20000 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044411097570130204, - "loss": 3.4232, - "step": 20005 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004440388184050902, - "loss": 3.4982, - "step": 20010 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044396665027789746, - "loss": 3.4513, - "step": 20015 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044389447132515037, - "loss": 3.3997, - "step": 20020 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004438222815522765, - "loss": 3.5083, - "step": 20025 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004437500809647042, - "loss": 3.4299, - "step": 20030 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004436778695678625, - "loss": 3.4987, - "step": 20035 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004436056473671813, - "loss": 3.5377, - "step": 20040 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004435334143680914, - "loss": 3.5088, - "step": 20045 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044346117057602425, - "loss": 3.4873, - "step": 20050 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004433889159964125, - "loss": 3.474, - "step": 20055 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044331665063468894, - "loss": 3.5821, - "step": 20060 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044324437449628785, - "loss": 3.5429, - "step": 20065 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044317208758664386, - "loss": 3.2625, - "step": 20070 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044309978991119263, - "loss": 3.3802, - "step": 20075 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004430274814753706, - "loss": 3.5245, - "step": 20080 - }, - { - "epoch": 0.35, - "learning_rate": 0.000442955162284615, - "loss": 3.5747, - "step": 20085 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004428828323443638, - "loss": 3.4744, - "step": 20090 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044281049166005585, - "loss": 3.5422, - "step": 20095 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004427381402371308, - "loss": 3.3606, - "step": 20100 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044266577808102915, - "loss": 3.4586, - "step": 20105 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004425934051971921, - "loss": 3.4529, - "step": 20110 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044252102159106187, - "loss": 3.4736, - "step": 20115 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044244862726808114, - "loss": 3.415, - "step": 20120 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004423762222336936, - "loss": 3.4178, - "step": 20125 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044230380649334396, - "loss": 3.4815, - "step": 20130 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004422313800524773, - "loss": 3.4722, - "step": 20135 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004421589429165399, - "loss": 3.5723, - "step": 20140 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044208649509097837, - "loss": 3.4964, - "step": 20145 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044201403658124055, - "loss": 3.3457, - "step": 20150 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044194156739277524, - "loss": 3.5067, - "step": 20155 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044186908753103137, - "loss": 3.4389, - "step": 20160 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004417965970014591, - "loss": 3.3161, - "step": 20165 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044172409580950947, - "loss": 3.5093, - "step": 20170 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004416515839606343, - "loss": 3.3348, - "step": 20175 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044157906146028586, - "loss": 3.4568, - "step": 20180 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044150652831391755, - "loss": 3.4137, - "step": 20185 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004414339845269835, - "loss": 3.3819, - "step": 20190 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044136143010493884, - "loss": 3.5321, - "step": 20195 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044128886505323897, - "loss": 3.458, - "step": 20200 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044121628937734053, - "loss": 3.4455, - "step": 20205 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004411437030827008, - "loss": 3.4663, - "step": 20210 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004410711061747781, - "loss": 3.5031, - "step": 20215 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044099849865903107, - "loss": 3.526, - "step": 20220 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004409258805409196, - "loss": 3.4007, - "step": 20225 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004408532518259041, - "loss": 3.4619, - "step": 20230 - }, - { - "epoch": 0.35, - "learning_rate": 0.000440780612519446, - "loss": 3.4583, - "step": 20235 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004407079626270072, - "loss": 3.4842, - "step": 20240 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004406353021540509, - "loss": 3.5053, - "step": 20245 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044056263110604045, - "loss": 3.5203, - "step": 20250 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044048994948844063, - "loss": 3.5216, - "step": 20255 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004404172573067166, - "loss": 3.4012, - "step": 20260 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004403445545663344, - "loss": 3.4549, - "step": 20265 - }, - { - "epoch": 0.35, - "learning_rate": 0.000440271841272761, - "loss": 3.3967, - "step": 20270 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044019911743146397, - "loss": 3.4056, - "step": 20275 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004401263830479118, - "loss": 3.5852, - "step": 20280 - }, - { - "epoch": 0.35, - "learning_rate": 0.00044005363812757377, - "loss": 3.5015, - "step": 20285 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043998088267591996, - "loss": 3.5004, - "step": 20290 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043990811669842114, - "loss": 3.4759, - "step": 20295 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043983534020054907, - "loss": 3.4747, - "step": 20300 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043976255318777587, - "loss": 3.4997, - "step": 20305 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004396897556655751, - "loss": 3.4021, - "step": 20310 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043961694763942053, - "loss": 3.5991, - "step": 20315 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043954412911478705, - "loss": 3.4963, - "step": 20320 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004394713000971501, - "loss": 3.3867, - "step": 20325 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004393984605919863, - "loss": 3.493, - "step": 20330 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004393256106047726, - "loss": 3.4838, - "step": 20335 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004392527501409871, - "loss": 3.4146, - "step": 20340 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004391798792061084, - "loss": 3.4319, - "step": 20345 - }, - { - "epoch": 0.35, - "learning_rate": 0.000439106997805616, - "loss": 3.3523, - "step": 20350 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004390341059449903, - "loss": 3.4308, - "step": 20355 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043896120362971246, - "loss": 3.3949, - "step": 20360 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004388882908652642, - "loss": 3.5024, - "step": 20365 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004388153676571283, - "loss": 3.4641, - "step": 20370 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043874243401078813, - "loss": 3.4261, - "step": 20375 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043866948993172793, - "loss": 3.4187, - "step": 20380 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043859653542543286, - "loss": 3.4059, - "step": 20385 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043852357049738855, - "loss": 3.381, - "step": 20390 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004384505951530817, - "loss": 3.5046, - "step": 20395 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004383776093979996, - "loss": 3.4859, - "step": 20400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043830461323763047, - "loss": 3.5565, - "step": 20405 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004382316066774633, - "loss": 3.4298, - "step": 20410 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043815858972298763, - "loss": 3.4938, - "step": 20415 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043808556237969403, - "loss": 3.5072, - "step": 20420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043801252465307396, - "loss": 3.5391, - "step": 20425 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004379394765486192, - "loss": 3.4818, - "step": 20430 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004378664180718228, - "loss": 3.4382, - "step": 20435 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004377933492281783, - "loss": 3.5275, - "step": 20440 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004377202700231801, - "loss": 3.4213, - "step": 20445 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043764718046232346, - "loss": 3.359, - "step": 20450 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004375740805511043, - "loss": 3.5426, - "step": 20455 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043750097029501925, - "loss": 3.4999, - "step": 20460 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004374278496995659, - "loss": 3.4504, - "step": 20465 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004373547187702426, - "loss": 3.4479, - "step": 20470 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043728157751254834, - "loss": 3.4309, - "step": 20475 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004372084259319831, - "loss": 3.468, - "step": 20480 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043713526403404746, - "loss": 3.4853, - "step": 20485 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004370620918242426, - "loss": 3.4808, - "step": 20490 - }, - { - "epoch": 0.35, - "learning_rate": 0.000436988909308071, - "loss": 3.4663, - "step": 20495 - }, - { - "epoch": 0.35, - "learning_rate": 0.0004369157164910354, - "loss": 3.3601, - "step": 20500 - }, - { - "epoch": 0.35, - "eval_loss": 3.4890780448913574, - "eval_runtime": 149.7679, - "eval_samples_per_second": 12.292, - "eval_steps_per_second": 0.775, - "step": 20500 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043684251337863957, - "loss": 3.503, - "step": 20505 - }, - { - "epoch": 0.35, - "learning_rate": 0.00043676929997638807, - "loss": 3.4588, - "step": 20510 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043669607628978616, - "loss": 3.5404, - "step": 20515 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043662284232433995, - "loss": 3.4366, - "step": 20520 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004365495980855561, - "loss": 3.3995, - "step": 20525 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043647634357894225, - "loss": 3.3957, - "step": 20530 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004364030788100069, - "loss": 3.5176, - "step": 20535 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043632980378425894, - "loss": 3.5703, - "step": 20540 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043625651850720846, - "loss": 3.4949, - "step": 20545 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043618322298436614, - "loss": 3.4727, - "step": 20550 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004361099172212433, - "loss": 3.4264, - "step": 20555 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004360366012233523, - "loss": 3.4806, - "step": 20560 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043596327499620613, - "loss": 3.4705, - "step": 20565 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043588993854531835, - "loss": 3.4814, - "step": 20570 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004358165918762037, - "loss": 3.4198, - "step": 20575 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043574323499437736, - "loss": 3.3648, - "step": 20580 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004356698679053554, - "loss": 3.5215, - "step": 20585 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004355964906146547, - "loss": 3.4305, - "step": 20590 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004355231031277928, - "loss": 3.3691, - "step": 20595 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004354497054502882, - "loss": 3.4175, - "step": 20600 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043537629758765977, - "loss": 3.4657, - "step": 20605 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004353028795454275, - "loss": 3.5085, - "step": 20610 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043522945132911235, - "loss": 3.4169, - "step": 20615 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004351560129442354, - "loss": 3.4685, - "step": 20620 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043508256439631884, - "loss": 3.5605, - "step": 20625 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043500910569088577, - "loss": 3.4576, - "step": 20630 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004349356368334599, - "loss": 3.5073, - "step": 20635 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004348621578295657, - "loss": 3.456, - "step": 20640 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043478866868472833, - "loss": 3.3731, - "step": 20645 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004347151694044738, - "loss": 3.4255, - "step": 20650 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043464165999432903, - "loss": 3.4228, - "step": 20655 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004345681404598215, - "loss": 3.4786, - "step": 20660 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004344946108064794, - "loss": 3.4763, - "step": 20665 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004344210710398318, - "loss": 3.4635, - "step": 20670 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043434752116540854, - "loss": 3.4831, - "step": 20675 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004342739611887403, - "loss": 3.4503, - "step": 20680 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004342003911153583, - "loss": 3.4472, - "step": 20685 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043412681095079465, - "loss": 3.346, - "step": 20690 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043405322070058216, - "loss": 3.3185, - "step": 20695 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004339796203702545, - "loss": 3.5082, - "step": 20700 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043390600996534606, - "loss": 3.5576, - "step": 20705 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004338323894913919, - "loss": 3.4343, - "step": 20710 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043375875895392793, - "loss": 3.5085, - "step": 20715 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004336851183584908, - "loss": 3.3682, - "step": 20720 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004336114677106179, - "loss": 3.419, - "step": 20725 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004335378070158474, - "loss": 3.5034, - "step": 20730 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043346413627971803, - "loss": 3.4361, - "step": 20735 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043339045550776976, - "loss": 3.402, - "step": 20740 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043331676470554286, - "loss": 3.4727, - "step": 20745 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004332430638785784, - "loss": 3.5097, - "step": 20750 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004331693530324184, - "loss": 3.4251, - "step": 20755 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043309563217260564, - "loss": 3.4258, - "step": 20760 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004330219013046834, - "loss": 3.3033, - "step": 20765 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004329481604341959, - "loss": 3.4339, - "step": 20770 - }, - { - "epoch": 0.36, - "learning_rate": 0.000432874409566688, - "loss": 3.4677, - "step": 20775 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004328006487077056, - "loss": 3.3985, - "step": 20780 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043272687786279497, - "loss": 3.4579, - "step": 20785 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043265309703750327, - "loss": 3.3846, - "step": 20790 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004325793062373786, - "loss": 3.5574, - "step": 20795 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043250550546796953, - "loss": 3.4578, - "step": 20800 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004324316947348255, - "loss": 3.4623, - "step": 20805 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004323578740434967, - "loss": 3.5179, - "step": 20810 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004322840433995342, - "loss": 3.459, - "step": 20815 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004322102028084896, - "loss": 3.478, - "step": 20820 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043213635227591516, - "loss": 3.3421, - "step": 20825 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004320624918073643, - "loss": 3.3589, - "step": 20830 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004319886214083908, - "loss": 3.4584, - "step": 20835 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004319147410845495, - "loss": 3.3751, - "step": 20840 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004318408508413957, - "loss": 3.6159, - "step": 20845 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043176695068448555, - "loss": 3.3952, - "step": 20850 - }, - { - "epoch": 0.36, - "learning_rate": 0.000431693040619376, - "loss": 3.5016, - "step": 20855 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004316191206516248, - "loss": 3.412, - "step": 20860 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004315451907867902, - "loss": 3.5053, - "step": 20865 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043147125103043147, - "loss": 3.4214, - "step": 20870 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043139730138810835, - "loss": 3.4585, - "step": 20875 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004313233418653816, - "loss": 3.3709, - "step": 20880 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043124937246781254, - "loss": 3.419, - "step": 20885 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043117539320096333, - "loss": 3.4385, - "step": 20890 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043110140407039686, - "loss": 3.4396, - "step": 20895 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004310274050816767, - "loss": 3.3851, - "step": 20900 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043095339624036705, - "loss": 3.4022, - "step": 20905 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004308793775520333, - "loss": 3.4697, - "step": 20910 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043080534902224094, - "loss": 3.3228, - "step": 20915 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043073131065655685, - "loss": 3.4832, - "step": 20920 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004306572624605481, - "loss": 3.4429, - "step": 20925 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004305832044397828, - "loss": 3.3656, - "step": 20930 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043050913659982984, - "loss": 3.4722, - "step": 20935 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043043505894625864, - "loss": 3.4771, - "step": 20940 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004303609714846394, - "loss": 3.4003, - "step": 20945 - }, - { - "epoch": 0.36, - "learning_rate": 0.00043028687422054317, - "loss": 3.5443, - "step": 20950 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004302127671595418, - "loss": 3.4675, - "step": 20955 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004301386503072077, - "loss": 3.4031, - "step": 20960 - }, - { - "epoch": 0.36, - "learning_rate": 0.000430064523669114, - "loss": 3.3901, - "step": 20965 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004299903872508346, - "loss": 3.4691, - "step": 20970 - }, - { - "epoch": 0.36, - "learning_rate": 0.00042991624105794435, - "loss": 3.4782, - "step": 20975 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004298420850960186, - "loss": 3.481, - "step": 20980 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004297679193706335, - "loss": 3.4205, - "step": 20985 - }, - { - "epoch": 0.36, - "learning_rate": 0.00042969374388736586, - "loss": 3.4272, - "step": 20990 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004296195586517933, - "loss": 3.4391, - "step": 20995 - }, - { - "epoch": 0.36, - "learning_rate": 0.00042954536366949435, - "loss": 3.5532, - "step": 21000 - }, - { - "epoch": 0.36, - "eval_loss": 3.4810240268707275, - "eval_runtime": 149.6804, - "eval_samples_per_second": 12.3, - "eval_steps_per_second": 0.775, - "step": 21000 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004294711589460479, - "loss": 3.5336, - "step": 21005 - }, - { - "epoch": 0.36, - "learning_rate": 0.00042939694448703385, - "loss": 3.5589, - "step": 21010 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004293227202980327, - "loss": 3.3975, - "step": 21015 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004292484863846257, - "loss": 3.4718, - "step": 21020 - }, - { - "epoch": 0.36, - "learning_rate": 0.000429174242752395, - "loss": 3.4397, - "step": 21025 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004290999894069232, - "loss": 3.5044, - "step": 21030 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004290257263537938, - "loss": 3.4743, - "step": 21035 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004289514535985911, - "loss": 3.4612, - "step": 21040 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004288771711468999, - "loss": 3.5065, - "step": 21045 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004288028790043059, - "loss": 3.3358, - "step": 21050 - }, - { - "epoch": 0.36, - "learning_rate": 0.00042872857717639556, - "loss": 3.4931, - "step": 21055 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004286542656687559, - "loss": 3.4695, - "step": 21060 - }, - { - "epoch": 0.36, - "learning_rate": 0.00042857994448697485, - "loss": 3.3413, - "step": 21065 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004285056136366409, - "loss": 3.497, - "step": 21070 - }, - { - "epoch": 0.36, - "learning_rate": 0.00042843127312334345, - "loss": 3.4309, - "step": 21075 - }, - { - "epoch": 0.36, - "learning_rate": 0.00042835692295267237, - "loss": 3.5423, - "step": 21080 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004282825631302185, - "loss": 3.4508, - "step": 21085 - }, - { - "epoch": 0.36, - "learning_rate": 0.0004282081936615734, - "loss": 3.4325, - "step": 21090 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004281338145523291, - "loss": 3.4188, - "step": 21095 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042805942580807863, - "loss": 3.3961, - "step": 21100 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042798502743441577, - "loss": 3.5294, - "step": 21105 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004279106194369346, - "loss": 3.5052, - "step": 21110 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004278362018212304, - "loss": 3.4474, - "step": 21115 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004277617745928991, - "loss": 3.4268, - "step": 21120 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042768733775753705, - "loss": 3.4271, - "step": 21125 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004276128913207415, - "loss": 3.319, - "step": 21130 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042753843528811055, - "loss": 3.4287, - "step": 21135 - }, - { - "epoch": 0.37, - "learning_rate": 0.000427463969665243, - "loss": 3.502, - "step": 21140 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004273894944577381, - "loss": 3.3916, - "step": 21145 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042731500967119613, - "loss": 3.4751, - "step": 21150 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004272405153112179, - "loss": 3.3869, - "step": 21155 - }, - { - "epoch": 0.37, - "learning_rate": 0.000427166011383405, - "loss": 3.3328, - "step": 21160 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004270914978933598, - "loss": 3.5021, - "step": 21165 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042701697484668515, - "loss": 3.5497, - "step": 21170 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004269424422489851, - "loss": 3.4791, - "step": 21175 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042686790010586386, - "loss": 3.4404, - "step": 21180 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004267933484229267, - "loss": 3.3707, - "step": 21185 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004267187872057797, - "loss": 3.4202, - "step": 21190 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042664421646002916, - "loss": 3.4687, - "step": 21195 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042656963619128267, - "loss": 3.4317, - "step": 21200 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042649504640514816, - "loss": 3.3562, - "step": 21205 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004264204471072345, - "loss": 3.5205, - "step": 21210 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042634583830315103, - "loss": 3.3985, - "step": 21215 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042627121999850806, - "loss": 3.4669, - "step": 21220 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004261965921989165, - "loss": 3.3633, - "step": 21225 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042612195490998795, - "loss": 3.4944, - "step": 21230 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004260473081373347, - "loss": 3.4313, - "step": 21235 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042597265188656995, - "loss": 3.3019, - "step": 21240 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004258979861633073, - "loss": 3.5405, - "step": 21245 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004258233109731613, - "loss": 3.4114, - "step": 21250 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004257486263217473, - "loss": 3.5809, - "step": 21255 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042567393221468094, - "loss": 3.4726, - "step": 21260 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042559922865757896, - "loss": 3.4404, - "step": 21265 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004255245156560586, - "loss": 3.4897, - "step": 21270 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042544979321573794, - "loss": 3.4024, - "step": 21275 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042537506134223595, - "loss": 3.4146, - "step": 21280 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004253003200411717, - "loss": 3.2768, - "step": 21285 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004252255693181657, - "loss": 3.3862, - "step": 21290 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004251508091788385, - "loss": 3.4472, - "step": 21295 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042507603962881193, - "loss": 3.3695, - "step": 21300 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042500126067370814, - "loss": 3.5154, - "step": 21305 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042492647231915016, - "loss": 3.466, - "step": 21310 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004248516745707618, - "loss": 3.5312, - "step": 21315 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004247768674341672, - "loss": 3.4309, - "step": 21320 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004247020509149918, - "loss": 3.3506, - "step": 21325 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042462722501886114, - "loss": 3.3954, - "step": 21330 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042455238975140186, - "loss": 3.4317, - "step": 21335 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004244775451182413, - "loss": 3.4081, - "step": 21340 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004244026911250072, - "loss": 3.4154, - "step": 21345 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004243278277773283, - "loss": 3.378, - "step": 21350 - }, - { - "epoch": 0.37, - "learning_rate": 0.000424252955080834, - "loss": 3.4763, - "step": 21355 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004241780730411541, - "loss": 3.3921, - "step": 21360 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004241031816639197, - "loss": 3.4287, - "step": 21365 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004240282809547619, - "loss": 3.4556, - "step": 21370 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042395337091931314, - "loss": 3.5046, - "step": 21375 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004238784515632061, - "loss": 3.3785, - "step": 21380 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004238035228920743, - "loss": 3.4726, - "step": 21385 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042372858491155203, - "loss": 3.5175, - "step": 21390 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004236536376272744, - "loss": 3.4565, - "step": 21395 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042357868104487683, - "loss": 3.4254, - "step": 21400 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042350371516999586, - "loss": 3.4177, - "step": 21405 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004234287400082684, - "loss": 3.4484, - "step": 21410 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004233537555653322, - "loss": 3.4054, - "step": 21415 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042327876184682594, - "loss": 3.3857, - "step": 21420 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004232037588583885, - "loss": 3.451, - "step": 21425 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004231287466056597, - "loss": 3.4559, - "step": 21430 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004230537250942803, - "loss": 3.4196, - "step": 21435 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004229786943298914, - "loss": 3.454, - "step": 21440 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004229036543181349, - "loss": 3.4613, - "step": 21445 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004228286050646535, - "loss": 3.4041, - "step": 21450 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042275354657509036, - "loss": 3.4881, - "step": 21455 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042267847885508976, - "loss": 3.5192, - "step": 21460 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042260340191029624, - "loss": 3.4635, - "step": 21465 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004225283157463553, - "loss": 3.4071, - "step": 21470 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004224532203689128, - "loss": 3.4137, - "step": 21475 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004223781157836158, - "loss": 3.3856, - "step": 21480 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004223030019961116, - "loss": 3.4598, - "step": 21485 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042222787901204854, - "loss": 3.4487, - "step": 21490 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004221527468370754, - "loss": 3.4663, - "step": 21495 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042207760547684155, - "loss": 3.4861, - "step": 21500 - }, - { - "epoch": 0.37, - "eval_loss": 3.4763381481170654, - "eval_runtime": 149.6778, - "eval_samples_per_second": 12.3, - "eval_steps_per_second": 0.775, - "step": 21500 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004220024549369975, - "loss": 3.5112, - "step": 21505 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004219272952231942, - "loss": 3.4445, - "step": 21510 - }, - { - "epoch": 0.37, - "learning_rate": 0.000421852126341083, - "loss": 3.4347, - "step": 21515 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042177694829631653, - "loss": 3.4521, - "step": 21520 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042170176109454754, - "loss": 3.3605, - "step": 21525 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004216265647414298, - "loss": 3.3925, - "step": 21530 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042155135924261785, - "loss": 3.4833, - "step": 21535 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042147614460376646, - "loss": 3.435, - "step": 21540 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004214009208305317, - "loss": 3.4267, - "step": 21545 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004213256879285698, - "loss": 3.4272, - "step": 21550 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042125044590353796, - "loss": 3.3872, - "step": 21555 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042117519476109395, - "loss": 3.406, - "step": 21560 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004210999345068963, - "loss": 3.3582, - "step": 21565 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004210246651466043, - "loss": 3.4148, - "step": 21570 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042094938668587765, - "loss": 3.4063, - "step": 21575 - }, - { - "epoch": 0.37, - "learning_rate": 0.000420874099130377, - "loss": 3.472, - "step": 21580 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004207988024857636, - "loss": 3.5276, - "step": 21585 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042072349675769923, - "loss": 3.5215, - "step": 21590 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042064818195184663, - "loss": 3.5187, - "step": 21595 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004205728580738691, - "loss": 3.4223, - "step": 21600 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004204975251294306, - "loss": 3.3929, - "step": 21605 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042042218312419576, - "loss": 3.5108, - "step": 21610 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042034683206382983, - "loss": 3.571, - "step": 21615 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004202714719539989, - "loss": 3.5817, - "step": 21620 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004201961028003697, - "loss": 3.4792, - "step": 21625 - }, - { - "epoch": 0.37, - "learning_rate": 0.00042012072460860956, - "loss": 3.4894, - "step": 21630 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004200453373843865, - "loss": 3.4205, - "step": 21635 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004199699411333693, - "loss": 3.5227, - "step": 21640 - }, - { - "epoch": 0.37, - "learning_rate": 0.00041989453586122744, - "loss": 3.3261, - "step": 21645 - }, - { - "epoch": 0.37, - "learning_rate": 0.00041981912157363087, - "loss": 3.4374, - "step": 21650 - }, - { - "epoch": 0.37, - "learning_rate": 0.0004197436982762504, - "loss": 3.4541, - "step": 21655 - }, - { - "epoch": 0.37, - "learning_rate": 0.00041966826597475757, - "loss": 3.4462, - "step": 21660 - }, - { - "epoch": 0.37, - "learning_rate": 0.00041959282467482435, - "loss": 3.4077, - "step": 21665 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041951737438212367, - "loss": 3.3667, - "step": 21670 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004194419151023289, - "loss": 3.4438, - "step": 21675 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004193664468411143, - "loss": 3.4317, - "step": 21680 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041929096960415457, - "loss": 3.4413, - "step": 21685 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004192154833971254, - "loss": 3.507, - "step": 21690 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041913998822570267, - "loss": 3.5172, - "step": 21695 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004190644840955636, - "loss": 3.4648, - "step": 21700 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004189889710123854, - "loss": 3.4228, - "step": 21705 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041891344898184647, - "loss": 3.383, - "step": 21710 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041883791800962554, - "loss": 3.4611, - "step": 21715 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004187623781014021, - "loss": 3.4332, - "step": 21720 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041868682926285677, - "loss": 3.3851, - "step": 21725 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004186112714996699, - "loss": 3.4926, - "step": 21730 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041853570481752335, - "loss": 3.3628, - "step": 21735 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004184601292220993, - "loss": 3.4518, - "step": 21740 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041838454471908066, - "loss": 3.2995, - "step": 21745 - }, - { - "epoch": 0.38, - "learning_rate": 0.000418308951314151, - "loss": 3.4446, - "step": 21750 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041823334901299435, - "loss": 3.5188, - "step": 21755 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041815773782129587, - "loss": 3.4001, - "step": 21760 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004180821177447411, - "loss": 3.4094, - "step": 21765 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041800648878901627, - "loss": 3.3152, - "step": 21770 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041793085095980816, - "loss": 3.3421, - "step": 21775 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004178552042628045, - "loss": 3.4248, - "step": 21780 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041777954870369344, - "loss": 3.401, - "step": 21785 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041770388428816397, - "loss": 3.5127, - "step": 21790 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041762821102190554, - "loss": 3.4763, - "step": 21795 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004175525289106085, - "loss": 3.475, - "step": 21800 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041747683795996385, - "loss": 3.3599, - "step": 21805 - }, - { - "epoch": 0.38, - "learning_rate": 0.000417401138175663, - "loss": 3.4129, - "step": 21810 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041732542956339816, - "loss": 3.4717, - "step": 21815 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004172497121288623, - "loss": 3.4032, - "step": 21820 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041717398587774913, - "loss": 3.442, - "step": 21825 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041709825081575263, - "loss": 3.4066, - "step": 21830 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004170225069485677, - "loss": 3.4771, - "step": 21835 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041694675428189006, - "loss": 3.4156, - "step": 21840 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004168709928214159, - "loss": 3.3453, - "step": 21845 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041679522257284196, - "loss": 3.5143, - "step": 21850 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041671944354186585, - "loss": 3.5789, - "step": 21855 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041664365573418577, - "loss": 3.406, - "step": 21860 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041656785915550055, - "loss": 3.4958, - "step": 21865 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004164920538115098, - "loss": 3.4977, - "step": 21870 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004164162397079135, - "loss": 3.4383, - "step": 21875 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041634041685041266, - "loss": 3.5104, - "step": 21880 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004162645852447087, - "loss": 3.4549, - "step": 21885 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004161887448965039, - "loss": 3.4399, - "step": 21890 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004161128958115008, - "loss": 3.3542, - "step": 21895 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004160370379954029, - "loss": 3.4418, - "step": 21900 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041596117145391466, - "loss": 3.3958, - "step": 21905 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004158852961927405, - "loss": 3.3919, - "step": 21910 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004158094122175859, - "loss": 3.3159, - "step": 21915 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004157335195341571, - "loss": 3.5038, - "step": 21920 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004156576181481608, - "loss": 3.3401, - "step": 21925 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041558170806530435, - "loss": 3.4432, - "step": 21930 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004155057892912957, - "loss": 3.3199, - "step": 21935 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041542986183184367, - "loss": 3.5088, - "step": 21940 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041535392569265765, - "loss": 3.5437, - "step": 21945 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004152779808794476, - "loss": 3.3675, - "step": 21950 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004152020273979242, - "loss": 3.4963, - "step": 21955 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041512606525379866, - "loss": 3.422, - "step": 21960 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004150500944527831, - "loss": 3.3885, - "step": 21965 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041497411500059, - "loss": 3.4297, - "step": 21970 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041489812690293274, - "loss": 3.447, - "step": 21975 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041482213016552513, - "loss": 3.4361, - "step": 21980 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004147461247940818, - "loss": 3.423, - "step": 21985 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004146701107943179, - "loss": 3.4564, - "step": 21990 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041459408817194946, - "loss": 3.3803, - "step": 21995 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041451805693269283, - "loss": 3.4608, - "step": 22000 - }, - { - "epoch": 0.38, - "eval_loss": 3.4630024433135986, - "eval_runtime": 149.7666, - "eval_samples_per_second": 12.292, - "eval_steps_per_second": 0.775, - "step": 22000 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004144420170822652, - "loss": 3.4355, - "step": 22005 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004143659686263844, - "loss": 3.4343, - "step": 22010 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004142899115707688, - "loss": 3.3277, - "step": 22015 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004142138459211377, - "loss": 3.4995, - "step": 22020 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004141377716832107, - "loss": 3.4184, - "step": 22025 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041406168886270816, - "loss": 3.3363, - "step": 22030 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041398559746535113, - "loss": 3.3641, - "step": 22035 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004139094974968614, - "loss": 3.4262, - "step": 22040 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004138333889629613, - "loss": 3.4048, - "step": 22045 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004137572718693737, - "loss": 3.4102, - "step": 22050 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041368114622182214, - "loss": 3.4041, - "step": 22055 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004136050120260311, - "loss": 3.4719, - "step": 22060 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004135288692877254, - "loss": 3.4398, - "step": 22065 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004134527180126305, - "loss": 3.3921, - "step": 22070 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004133765582064726, - "loss": 3.4075, - "step": 22075 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041330038987497856, - "loss": 3.4711, - "step": 22080 - }, - { - "epoch": 0.38, - "learning_rate": 0.000413224213023876, - "loss": 3.4251, - "step": 22085 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041314802765889276, - "loss": 3.3675, - "step": 22090 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004130718337857578, - "loss": 3.362, - "step": 22095 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004129956314102003, - "loss": 3.4152, - "step": 22100 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004129194205379506, - "loss": 3.4164, - "step": 22105 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041284320117473906, - "loss": 3.476, - "step": 22110 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041276697332629707, - "loss": 3.3858, - "step": 22115 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004126907369983566, - "loss": 3.3862, - "step": 22120 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004126144921966503, - "loss": 3.3574, - "step": 22125 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041253823892691135, - "loss": 3.358, - "step": 22130 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004124619771948735, - "loss": 3.4601, - "step": 22135 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041238570700627143, - "loss": 3.566, - "step": 22140 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041230942836684, - "loss": 3.4623, - "step": 22145 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041223314128231535, - "loss": 3.4965, - "step": 22150 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041215684575843345, - "loss": 3.3462, - "step": 22155 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041208054180093175, - "loss": 3.3736, - "step": 22160 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041200422941554753, - "loss": 3.503, - "step": 22165 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004119279086080193, - "loss": 3.5064, - "step": 22170 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041185157938408603, - "loss": 3.4897, - "step": 22175 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041177524174948715, - "loss": 3.4477, - "step": 22180 - }, - { - "epoch": 0.38, - "learning_rate": 0.000411698895709963, - "loss": 3.3709, - "step": 22185 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004116225412712544, - "loss": 3.4257, - "step": 22190 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004115461784391026, - "loss": 3.3655, - "step": 22195 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004114698072192499, - "loss": 3.4449, - "step": 22200 - }, - { - "epoch": 0.38, - "learning_rate": 0.000411393427617439, - "loss": 3.4398, - "step": 22205 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004113170396394133, - "loss": 3.1629, - "step": 22210 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004112406432909165, - "loss": 3.3966, - "step": 22215 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004111642385776935, - "loss": 3.3941, - "step": 22220 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004110878255054896, - "loss": 3.3217, - "step": 22225 - }, - { - "epoch": 0.38, - "learning_rate": 0.0004110114040800504, - "loss": 3.3977, - "step": 22230 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041093497430712267, - "loss": 3.4394, - "step": 22235 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041085853619245323, - "loss": 3.4824, - "step": 22240 - }, - { - "epoch": 0.38, - "learning_rate": 0.00041078208974179017, - "loss": 3.4449, - "step": 22245 - }, - { - "epoch": 0.39, - "learning_rate": 0.00041070563496088165, - "loss": 3.4106, - "step": 22250 - }, - { - "epoch": 0.39, - "learning_rate": 0.00041062917185547666, - "loss": 3.4365, - "step": 22255 - }, - { - "epoch": 0.39, - "learning_rate": 0.00041055270043132493, - "loss": 3.4706, - "step": 22260 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004104762206941768, - "loss": 3.3919, - "step": 22265 - }, - { - "epoch": 0.39, - "learning_rate": 0.000410399732649783, - "loss": 3.4725, - "step": 22270 - }, - { - "epoch": 0.39, - "learning_rate": 0.00041032323630389516, - "loss": 3.5326, - "step": 22275 - }, - { - "epoch": 0.39, - "learning_rate": 0.00041024673166226524, - "loss": 3.2729, - "step": 22280 - }, - { - "epoch": 0.39, - "learning_rate": 0.00041017021873064613, - "loss": 3.4428, - "step": 22285 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004100936975147913, - "loss": 3.4666, - "step": 22290 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004100171680204545, - "loss": 3.4196, - "step": 22295 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004099406302533905, - "loss": 3.4659, - "step": 22300 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040986408421935454, - "loss": 3.4275, - "step": 22305 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040978752992410264, - "loss": 3.4052, - "step": 22310 - }, - { - "epoch": 0.39, - "learning_rate": 0.000409710967373391, - "loss": 3.5221, - "step": 22315 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004096343965729769, - "loss": 3.5078, - "step": 22320 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040955781752861805, - "loss": 3.4321, - "step": 22325 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040948123024607286, - "loss": 3.4621, - "step": 22330 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040940463473110013, - "loss": 3.3809, - "step": 22335 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004093280309894596, - "loss": 3.4517, - "step": 22340 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040925141902691144, - "loss": 3.4528, - "step": 22345 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040917479884921654, - "loss": 3.4547, - "step": 22350 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004090981704621362, - "loss": 3.4308, - "step": 22355 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004090215338714325, - "loss": 3.2692, - "step": 22360 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004089448890828683, - "loss": 3.4193, - "step": 22365 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040886823610220674, - "loss": 3.4402, - "step": 22370 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004087915749352117, - "loss": 3.4521, - "step": 22375 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040871490558764786, - "loss": 3.3807, - "step": 22380 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004086382280652802, - "loss": 3.4643, - "step": 22385 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004085615423738747, - "loss": 3.4981, - "step": 22390 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004084848485191974, - "loss": 3.5087, - "step": 22395 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040840814650701553, - "loss": 3.453, - "step": 22400 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004083314363430967, - "loss": 3.3977, - "step": 22405 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040825471803320894, - "loss": 3.357, - "step": 22410 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004081779915831213, - "loss": 3.4247, - "step": 22415 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040810125699860297, - "loss": 3.3161, - "step": 22420 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040802451428542414, - "loss": 3.4269, - "step": 22425 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004079477634493556, - "loss": 3.4299, - "step": 22430 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040787100449616833, - "loss": 3.4994, - "step": 22435 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004077942374316344, - "loss": 3.3577, - "step": 22440 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004077174622615262, - "loss": 3.353, - "step": 22445 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004076406789916169, - "loss": 3.5383, - "step": 22450 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040756388762768023, - "loss": 3.4003, - "step": 22455 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004074870881754904, - "loss": 3.4799, - "step": 22460 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004074102806408225, - "loss": 3.3984, - "step": 22465 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004073334650294519, - "loss": 3.4481, - "step": 22470 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040725664134715485, - "loss": 3.4943, - "step": 22475 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040717980959970794, - "loss": 3.4733, - "step": 22480 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004071029697928887, - "loss": 3.4076, - "step": 22485 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004070261219324751, - "loss": 3.4849, - "step": 22490 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040694926602424553, - "loss": 3.4302, - "step": 22495 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004068724020739793, - "loss": 3.4945, - "step": 22500 - }, - { - "epoch": 0.39, - "eval_loss": 3.4543726444244385, - "eval_runtime": 149.6841, - "eval_samples_per_second": 12.299, - "eval_steps_per_second": 0.775, - "step": 22500 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040679553008745613, - "loss": 3.5376, - "step": 22505 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040671865007045654, - "loss": 3.2163, - "step": 22510 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040664176202876123, - "loss": 3.4521, - "step": 22515 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040656486596815194, - "loss": 3.3826, - "step": 22520 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040648796189441094, - "loss": 3.3821, - "step": 22525 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040641104981332097, - "loss": 3.4799, - "step": 22530 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040633412973066534, - "loss": 3.3088, - "step": 22535 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004062572016522281, - "loss": 3.4578, - "step": 22540 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040618026558379396, - "loss": 3.4492, - "step": 22545 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040610332153114795, - "loss": 3.4271, - "step": 22550 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004060263695000759, - "loss": 3.3281, - "step": 22555 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040594940949636424, - "loss": 3.5284, - "step": 22560 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040587244152579996, - "loss": 3.4063, - "step": 22565 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040579546559417083, - "loss": 3.3343, - "step": 22570 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040571848170726463, - "loss": 3.4706, - "step": 22575 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004056414898708706, - "loss": 3.4401, - "step": 22580 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040556449009077777, - "loss": 3.4597, - "step": 22585 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040548748237277644, - "loss": 3.4925, - "step": 22590 - }, - { - "epoch": 0.39, - "learning_rate": 0.000405410466722657, - "loss": 3.2829, - "step": 22595 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004053334431462106, - "loss": 3.4492, - "step": 22600 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004052564116492292, - "loss": 3.315, - "step": 22605 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004051793722375051, - "loss": 3.4348, - "step": 22610 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004051023249168312, - "loss": 3.4744, - "step": 22615 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004050252696930011, - "loss": 3.4817, - "step": 22620 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004049482065718089, - "loss": 3.4592, - "step": 22625 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004048711355590495, - "loss": 3.3313, - "step": 22630 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040479405666051817, - "loss": 3.4485, - "step": 22635 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040471696988201076, - "loss": 3.3691, - "step": 22640 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040463987522932395, - "loss": 3.4079, - "step": 22645 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004045627727082548, - "loss": 3.4411, - "step": 22650 - }, - { - "epoch": 0.39, - "learning_rate": 0.000404485662324601, - "loss": 3.4073, - "step": 22655 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004044085440841609, - "loss": 3.3304, - "step": 22660 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004043314179927332, - "loss": 3.3583, - "step": 22665 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004042542840561178, - "loss": 3.4128, - "step": 22670 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004041771422801144, - "loss": 3.4874, - "step": 22675 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040409999267052385, - "loss": 3.4342, - "step": 22680 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004040228352331474, - "loss": 3.4489, - "step": 22685 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040394566997378674, - "loss": 3.4437, - "step": 22690 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040386849689824444, - "loss": 3.3225, - "step": 22695 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004037913160123235, - "loss": 3.4923, - "step": 22700 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004037141273218275, - "loss": 3.5339, - "step": 22705 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004036369308325607, - "loss": 3.4442, - "step": 22710 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004035597265503278, - "loss": 3.2786, - "step": 22715 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004034825144809342, - "loss": 3.3097, - "step": 22720 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004034052946301858, - "loss": 3.3156, - "step": 22725 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004033280670038893, - "loss": 3.4123, - "step": 22730 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004032508316078517, - "loss": 3.3769, - "step": 22735 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040317358844788064, - "loss": 3.4664, - "step": 22740 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004030963375297845, - "loss": 3.4246, - "step": 22745 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040301907885937217, - "loss": 3.4092, - "step": 22750 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004029418124424531, - "loss": 3.4981, - "step": 22755 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040286453828483723, - "loss": 3.3302, - "step": 22760 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040278725639233525, - "loss": 3.3779, - "step": 22765 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004027099667707584, - "loss": 3.4032, - "step": 22770 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004026326694259185, - "loss": 3.4799, - "step": 22775 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040255536436362767, - "loss": 3.4285, - "step": 22780 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040247805158969915, - "loss": 3.4642, - "step": 22785 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040240073110994637, - "loss": 3.4589, - "step": 22790 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004023234029301833, - "loss": 3.4435, - "step": 22795 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004022460670562248, - "loss": 3.3843, - "step": 22800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004021687234938861, - "loss": 3.5109, - "step": 22805 - }, - { - "epoch": 0.39, - "learning_rate": 0.000402091372248983, - "loss": 3.3412, - "step": 22810 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040201401332733177, - "loss": 3.2672, - "step": 22815 - }, - { - "epoch": 0.39, - "learning_rate": 0.00040193664673474967, - "loss": 3.3403, - "step": 22820 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004018592724770541, - "loss": 3.4237, - "step": 22825 - }, - { - "epoch": 0.4, - "learning_rate": 0.00040178189056006334, - "loss": 3.2901, - "step": 22830 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004017045009895959, - "loss": 3.5062, - "step": 22835 - }, - { - "epoch": 0.4, - "learning_rate": 0.00040162710377147134, - "loss": 3.45, - "step": 22840 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004015496989115093, - "loss": 3.435, - "step": 22845 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004014722864155304, - "loss": 3.4186, - "step": 22850 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004013948662893557, - "loss": 3.3804, - "step": 22855 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004013174385388065, - "loss": 3.4171, - "step": 22860 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004012400031697053, - "loss": 3.3592, - "step": 22865 - }, - { - "epoch": 0.4, - "learning_rate": 0.00040116256018787465, - "loss": 3.4479, - "step": 22870 - }, - { - "epoch": 0.4, - "learning_rate": 0.00040108510959913795, - "loss": 3.283, - "step": 22875 - }, - { - "epoch": 0.4, - "learning_rate": 0.00040100765140931905, - "loss": 3.5205, - "step": 22880 - }, - { - "epoch": 0.4, - "learning_rate": 0.00040093018562424235, - "loss": 3.3975, - "step": 22885 - }, - { - "epoch": 0.4, - "learning_rate": 0.00040085271224973303, - "loss": 3.4526, - "step": 22890 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004007752312916167, - "loss": 3.4228, - "step": 22895 - }, - { - "epoch": 0.4, - "learning_rate": 0.00040069774275571933, - "loss": 3.4961, - "step": 22900 - }, - { - "epoch": 0.4, - "learning_rate": 0.00040062024664786773, - "loss": 3.2809, - "step": 22905 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004005427429738893, - "loss": 3.3984, - "step": 22910 - }, - { - "epoch": 0.4, - "learning_rate": 0.00040046523173961195, - "loss": 3.3236, - "step": 22915 - }, - { - "epoch": 0.4, - "learning_rate": 0.00040038771295086397, - "loss": 3.4506, - "step": 22920 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004003101866134744, - "loss": 3.4803, - "step": 22925 - }, - { - "epoch": 0.4, - "learning_rate": 0.00040023265273327296, - "loss": 3.4333, - "step": 22930 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004001551113160897, - "loss": 3.4086, - "step": 22935 - }, - { - "epoch": 0.4, - "learning_rate": 0.00040007756236775543, - "loss": 3.329, - "step": 22940 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004000000058941012, - "loss": 3.4441, - "step": 22945 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003999224419009591, - "loss": 3.4387, - "step": 22950 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039984487039416153, - "loss": 3.3819, - "step": 22955 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003997672913795413, - "loss": 3.3857, - "step": 22960 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039968970486293213, - "loss": 3.4039, - "step": 22965 - }, - { - "epoch": 0.4, - "learning_rate": 0.000399612110850168, - "loss": 3.4457, - "step": 22970 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039953450934708366, - "loss": 3.4657, - "step": 22975 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039945690035951434, - "loss": 3.3894, - "step": 22980 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039937928389329575, - "loss": 3.5035, - "step": 22985 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003993016599542644, - "loss": 3.3818, - "step": 22990 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003992240285482571, - "loss": 3.3472, - "step": 22995 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003991463896811114, - "loss": 3.3529, - "step": 23000 - }, - { - "epoch": 0.4, - "eval_loss": 3.442578077316284, - "eval_runtime": 149.8744, - "eval_samples_per_second": 12.284, - "eval_steps_per_second": 0.774, - "step": 23000 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039906874335866535, - "loss": 3.3731, - "step": 23005 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003989910895867574, - "loss": 3.4402, - "step": 23010 - }, - { - "epoch": 0.4, - "learning_rate": 0.000398913428371227, - "loss": 3.4643, - "step": 23015 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003988357597179135, - "loss": 3.501, - "step": 23020 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003987580836326575, - "loss": 3.4019, - "step": 23025 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003986804001212998, - "loss": 3.351, - "step": 23030 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003986027091896817, - "loss": 3.313, - "step": 23035 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003985250108436452, - "loss": 3.4688, - "step": 23040 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003984473050890327, - "loss": 3.3642, - "step": 23045 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003983695919316875, - "loss": 3.3882, - "step": 23050 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039829187137745316, - "loss": 3.3845, - "step": 23055 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003982141434321738, - "loss": 3.3129, - "step": 23060 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039813640810169415, - "loss": 3.2984, - "step": 23065 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003980586653918596, - "loss": 3.4537, - "step": 23070 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039798091530851586, - "loss": 3.4673, - "step": 23075 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003979031578575096, - "loss": 3.4474, - "step": 23080 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003978253930446875, - "loss": 3.4899, - "step": 23085 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039774762087589713, - "loss": 3.4094, - "step": 23090 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003976698413569868, - "loss": 3.4574, - "step": 23095 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039759205449380476, - "loss": 3.4646, - "step": 23100 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003975142602922005, - "loss": 3.4299, - "step": 23105 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003974364587580236, - "loss": 3.4025, - "step": 23110 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003973586498971243, - "loss": 3.4642, - "step": 23115 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039728083371535355, - "loss": 3.3419, - "step": 23120 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003972030102185626, - "loss": 3.4246, - "step": 23125 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039712517941260344, - "loss": 3.4522, - "step": 23130 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003970473413033286, - "loss": 3.331, - "step": 23135 - }, - { - "epoch": 0.4, - "learning_rate": 0.000396969495896591, - "loss": 3.3998, - "step": 23140 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039689164319824436, - "loss": 3.3278, - "step": 23145 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003968137832141426, - "loss": 3.4126, - "step": 23150 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003967359159501406, - "loss": 3.4785, - "step": 23155 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039665804141209346, - "loss": 3.3449, - "step": 23160 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003965801596058569, - "loss": 3.2645, - "step": 23165 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039650227053728736, - "loss": 3.4129, - "step": 23170 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003964243742122417, - "loss": 3.4324, - "step": 23175 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039634647063657724, - "loss": 3.3419, - "step": 23180 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039626855981615184, - "loss": 3.4273, - "step": 23185 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039619064175682416, - "loss": 3.3218, - "step": 23190 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003961127164644533, - "loss": 3.5256, - "step": 23195 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003960347839448986, - "loss": 3.4073, - "step": 23200 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003959568442040203, - "loss": 3.4519, - "step": 23205 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003958788972476792, - "loss": 3.4716, - "step": 23210 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003958009430817364, - "loss": 3.3856, - "step": 23215 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003957229817120536, - "loss": 3.3233, - "step": 23220 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039564501314449317, - "loss": 3.4366, - "step": 23225 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039556703738491785, - "loss": 3.3467, - "step": 23230 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039548905443919117, - "loss": 3.324, - "step": 23235 - }, - { - "epoch": 0.4, - "learning_rate": 0.000395411064313177, - "loss": 3.3125, - "step": 23240 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003953330670127397, - "loss": 3.3889, - "step": 23245 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003952550625437443, - "loss": 3.4227, - "step": 23250 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003951770509120565, - "loss": 3.454, - "step": 23255 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039509903212354215, - "loss": 3.4451, - "step": 23260 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039502100618406797, - "loss": 3.3362, - "step": 23265 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003949429730995011, - "loss": 3.4359, - "step": 23270 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003948649328757093, - "loss": 3.3884, - "step": 23275 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039478688551856055, - "loss": 3.4077, - "step": 23280 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039470883103392393, - "loss": 3.3771, - "step": 23285 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039463076942766853, - "loss": 3.4804, - "step": 23290 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003945527007056644, - "loss": 3.3656, - "step": 23295 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003944746248737816, - "loss": 3.4062, - "step": 23300 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003943965419378911, - "loss": 3.3631, - "step": 23305 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039431845190386454, - "loss": 3.4399, - "step": 23310 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003942403547775738, - "loss": 3.4739, - "step": 23315 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003941622505648913, - "loss": 3.3574, - "step": 23320 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039408413927169016, - "loss": 3.463, - "step": 23325 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039400602090384384, - "loss": 3.3802, - "step": 23330 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003939278954672267, - "loss": 3.4331, - "step": 23335 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039384976296771303, - "loss": 3.5204, - "step": 23340 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039377162341117816, - "loss": 3.4733, - "step": 23345 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039369347680349786, - "loss": 3.3425, - "step": 23350 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039361532315054825, - "loss": 3.4375, - "step": 23355 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039353716245820613, - "loss": 3.4392, - "step": 23360 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003934589947323487, - "loss": 3.3395, - "step": 23365 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039338081997885384, - "loss": 3.3579, - "step": 23370 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003933026382036, - "loss": 3.4948, - "step": 23375 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003932244494124658, - "loss": 3.4524, - "step": 23380 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039314625361133086, - "loss": 3.4354, - "step": 23385 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039306805080607495, - "loss": 3.3287, - "step": 23390 - }, - { - "epoch": 0.4, - "learning_rate": 0.0003929898410025786, - "loss": 3.3441, - "step": 23395 - }, - { - "epoch": 0.4, - "learning_rate": 0.00039291162420672284, - "loss": 3.3456, - "step": 23400 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039283340042438905, - "loss": 3.3949, - "step": 23405 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003927551696614593, - "loss": 3.3505, - "step": 23410 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003926769319238162, - "loss": 3.4162, - "step": 23415 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039259868721734274, - "loss": 3.3707, - "step": 23420 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003925204355479226, - "loss": 3.3314, - "step": 23425 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039244217692143975, - "loss": 3.4314, - "step": 23430 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003923639113437791, - "loss": 3.4921, - "step": 23435 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039228563882082564, - "loss": 3.3425, - "step": 23440 - }, - { - "epoch": 0.41, - "learning_rate": 0.000392207359358465, - "loss": 3.3939, - "step": 23445 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039212907296258363, - "loss": 3.391, - "step": 23450 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039205077963906806, - "loss": 3.3738, - "step": 23455 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003919724793938056, - "loss": 3.5164, - "step": 23460 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039189417223268406, - "loss": 3.3694, - "step": 23465 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039181585816159176, - "loss": 3.354, - "step": 23470 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003917375371864174, - "loss": 3.4643, - "step": 23475 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003916592093130505, - "loss": 3.3646, - "step": 23480 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039158087454738063, - "loss": 3.38, - "step": 23485 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003915025328952984, - "loss": 3.5276, - "step": 23490 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039142418436269473, - "loss": 3.5326, - "step": 23495 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039134582895546094, - "loss": 3.4432, - "step": 23500 - }, - { - "epoch": 0.41, - "eval_loss": 3.4460275173187256, - "eval_runtime": 149.7826, - "eval_samples_per_second": 12.291, - "eval_steps_per_second": 0.774, - "step": 23500 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003912674666794889, - "loss": 3.5093, - "step": 23505 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039118909754067097, - "loss": 3.4232, - "step": 23510 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003911107215449005, - "loss": 3.3742, - "step": 23515 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039103233869807055, - "loss": 3.5677, - "step": 23520 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039095394900607537, - "loss": 3.412, - "step": 23525 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003908755524748092, - "loss": 3.4859, - "step": 23530 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039079714911016727, - "loss": 3.3516, - "step": 23535 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003907187389180451, - "loss": 3.4358, - "step": 23540 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003906403219043386, - "loss": 3.4573, - "step": 23545 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003905618980749445, - "loss": 3.4623, - "step": 23550 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039048346743575976, - "loss": 3.5096, - "step": 23555 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039040502999268203, - "loss": 3.3192, - "step": 23560 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003903265857516094, - "loss": 3.4335, - "step": 23565 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039024813471844034, - "loss": 3.4312, - "step": 23570 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039016967689907406, - "loss": 3.3351, - "step": 23575 - }, - { - "epoch": 0.41, - "learning_rate": 0.00039009121229941027, - "loss": 3.4376, - "step": 23580 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003900127409253489, - "loss": 3.378, - "step": 23585 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003899342627827909, - "loss": 3.4918, - "step": 23590 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038985577787763713, - "loss": 3.5062, - "step": 23595 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003897772862157893, - "loss": 3.3182, - "step": 23600 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038969878780314976, - "loss": 3.4342, - "step": 23605 - }, - { - "epoch": 0.41, - "learning_rate": 0.000389620282645621, - "loss": 3.3611, - "step": 23610 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003895417707491064, - "loss": 3.2832, - "step": 23615 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038946325211950945, - "loss": 3.4133, - "step": 23620 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003893847267627344, - "loss": 3.3847, - "step": 23625 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003893061946846861, - "loss": 3.2987, - "step": 23630 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038922765589126954, - "loss": 3.3355, - "step": 23635 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038914911038839063, - "loss": 3.2932, - "step": 23640 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003890705581819554, - "loss": 3.4283, - "step": 23645 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038899199927787067, - "loss": 3.3789, - "step": 23650 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038891343368204374, - "loss": 3.4124, - "step": 23655 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003888348614003823, - "loss": 3.388, - "step": 23660 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003887562824387945, - "loss": 3.4347, - "step": 23665 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038867769680318913, - "loss": 3.4287, - "step": 23670 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003885991044994755, - "loss": 3.4319, - "step": 23675 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003885205055335632, - "loss": 3.3868, - "step": 23680 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003884418999113625, - "loss": 3.4081, - "step": 23685 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038836328763878423, - "loss": 3.5162, - "step": 23690 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003882846687217396, - "loss": 3.439, - "step": 23695 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038820604316614036, - "loss": 3.4161, - "step": 23700 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003881274109778987, - "loss": 3.408, - "step": 23705 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003880487721629273, - "loss": 3.3882, - "step": 23710 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003879701267271396, - "loss": 3.2077, - "step": 23715 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038789147467644915, - "loss": 3.346, - "step": 23720 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038781281601677023, - "loss": 3.4345, - "step": 23725 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003877341507540176, - "loss": 3.2781, - "step": 23730 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038765547889410643, - "loss": 3.3699, - "step": 23735 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003875768004429525, - "loss": 3.4467, - "step": 23740 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038749811540647204, - "loss": 3.3993, - "step": 23745 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038741942379058163, - "loss": 3.3744, - "step": 23750 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038734072560119866, - "loss": 3.4018, - "step": 23755 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038726202084424076, - "loss": 3.412, - "step": 23760 - }, - { - "epoch": 0.41, - "learning_rate": 0.000387183309525626, - "loss": 3.4137, - "step": 23765 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003871045916512732, - "loss": 3.4427, - "step": 23770 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038702586722710153, - "loss": 3.4318, - "step": 23775 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003869471362590306, - "loss": 3.4418, - "step": 23780 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038686839875298073, - "loss": 3.3053, - "step": 23785 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003867896547148724, - "loss": 3.4195, - "step": 23790 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038671090415062683, - "loss": 3.343, - "step": 23795 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038663214706616565, - "loss": 3.4823, - "step": 23800 - }, - { - "epoch": 0.41, - "learning_rate": 0.000386553383467411, - "loss": 3.2957, - "step": 23805 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003864746133602855, - "loss": 3.3719, - "step": 23810 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003863958367507122, - "loss": 3.3919, - "step": 23815 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038631705364461485, - "loss": 3.3362, - "step": 23820 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038623826404791733, - "loss": 3.4481, - "step": 23825 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003861594679665443, - "loss": 3.4172, - "step": 23830 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038608066540642095, - "loss": 3.3761, - "step": 23835 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003860018563734727, - "loss": 3.4139, - "step": 23840 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003859230408736256, - "loss": 3.4002, - "step": 23845 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038584421891280606, - "loss": 3.3431, - "step": 23850 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003857653904969413, - "loss": 3.4767, - "step": 23855 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038568655563195873, - "loss": 3.3485, - "step": 23860 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003856077143237863, - "loss": 3.4446, - "step": 23865 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003855288665783525, - "loss": 3.4077, - "step": 23870 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003854500124015861, - "loss": 3.4456, - "step": 23875 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038537115179941686, - "loss": 3.4435, - "step": 23880 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003852922847777745, - "loss": 3.3529, - "step": 23885 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038521341134258947, - "loss": 3.3197, - "step": 23890 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003851345314997926, - "loss": 3.3096, - "step": 23895 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003850556452553152, - "loss": 3.4025, - "step": 23900 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003849767526150892, - "loss": 3.3826, - "step": 23905 - }, - { - "epoch": 0.41, - "learning_rate": 0.000384897853585047, - "loss": 3.3757, - "step": 23910 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038481894817112114, - "loss": 3.4822, - "step": 23915 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038474003637924516, - "loss": 3.3612, - "step": 23920 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003846611182153528, - "loss": 3.4059, - "step": 23925 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038458219368537813, - "loss": 3.2958, - "step": 23930 - }, - { - "epoch": 0.41, - "learning_rate": 0.000384503262795256, - "loss": 3.3191, - "step": 23935 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038442432555092163, - "loss": 3.4621, - "step": 23940 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038434538195831054, - "loss": 3.3922, - "step": 23945 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038426643202335906, - "loss": 3.3505, - "step": 23950 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038418747575200374, - "loss": 3.4525, - "step": 23955 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003841085131501818, - "loss": 3.4467, - "step": 23960 - }, - { - "epoch": 0.41, - "learning_rate": 0.0003840295442238306, - "loss": 3.4365, - "step": 23965 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038395056897888824, - "loss": 3.471, - "step": 23970 - }, - { - "epoch": 0.41, - "learning_rate": 0.00038387158742129345, - "loss": 3.3748, - "step": 23975 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038379259955698516, - "loss": 3.4801, - "step": 23980 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003837136053919028, - "loss": 3.3963, - "step": 23985 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003836346049319862, - "loss": 3.4244, - "step": 23990 - }, - { - "epoch": 0.42, - "learning_rate": 0.000383555598183176, - "loss": 3.469, - "step": 23995 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003834765851514131, - "loss": 3.4334, - "step": 24000 - }, - { - "epoch": 0.42, - "eval_loss": 3.4270379543304443, - "eval_runtime": 150.6758, - "eval_samples_per_second": 12.218, - "eval_steps_per_second": 0.77, - "step": 24000 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038339756584263875, - "loss": 3.3244, - "step": 24005 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038331854026279485, - "loss": 3.3813, - "step": 24010 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038323950841782375, - "loss": 3.3714, - "step": 24015 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003831604703136683, - "loss": 3.3798, - "step": 24020 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003830814259562715, - "loss": 3.3194, - "step": 24025 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003830023753515773, - "loss": 3.4367, - "step": 24030 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038292331850552986, - "loss": 3.4029, - "step": 24035 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038284425542407395, - "loss": 3.4108, - "step": 24040 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003827651861131545, - "loss": 3.3577, - "step": 24045 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038268611057871727, - "loss": 3.4693, - "step": 24050 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003826070288267082, - "loss": 3.3681, - "step": 24055 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038252794086307405, - "loss": 3.4132, - "step": 24060 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003824488466937616, - "loss": 3.4845, - "step": 24065 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003823697463247184, - "loss": 3.4444, - "step": 24070 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038229063976189247, - "loss": 3.4076, - "step": 24075 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038221152701123216, - "loss": 3.3896, - "step": 24080 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038213240807868633, - "loss": 3.3713, - "step": 24085 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003820532829702043, - "loss": 3.3014, - "step": 24090 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003819741516917359, - "loss": 3.4203, - "step": 24095 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038189501424923156, - "loss": 3.3145, - "step": 24100 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003818158706486417, - "loss": 3.36, - "step": 24105 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003817367208959177, - "loss": 3.3946, - "step": 24110 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003816575649970112, - "loss": 3.3836, - "step": 24115 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003815784029578743, - "loss": 3.4324, - "step": 24120 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003814992347844595, - "loss": 3.4453, - "step": 24125 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003814200604827201, - "loss": 3.4197, - "step": 24130 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038134088005860924, - "loss": 3.4852, - "step": 24135 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038126169351808127, - "loss": 3.408, - "step": 24140 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003811825008670902, - "loss": 3.4374, - "step": 24145 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003811033021115912, - "loss": 3.4174, - "step": 24150 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038102409725753947, - "loss": 3.4054, - "step": 24155 - }, - { - "epoch": 0.42, - "learning_rate": 0.000380944886310891, - "loss": 3.3431, - "step": 24160 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038086566927760185, - "loss": 3.5198, - "step": 24165 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038078644616362876, - "loss": 3.3664, - "step": 24170 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038070721697492895, - "loss": 3.3576, - "step": 24175 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003806279817174601, - "loss": 3.4815, - "step": 24180 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003805487403971802, - "loss": 3.4107, - "step": 24185 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038046949302004783, - "loss": 3.3978, - "step": 24190 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038039023959202193, - "loss": 3.3128, - "step": 24195 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003803109801190621, - "loss": 3.4749, - "step": 24200 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003802317146071281, - "loss": 3.3715, - "step": 24205 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038015244306218034, - "loss": 3.3911, - "step": 24210 - }, - { - "epoch": 0.42, - "learning_rate": 0.00038007316549017964, - "loss": 3.364, - "step": 24215 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037999388189708733, - "loss": 3.4206, - "step": 24220 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037991459228886495, - "loss": 3.4327, - "step": 24225 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003798352966714749, - "loss": 3.3105, - "step": 24230 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003797559950508796, - "loss": 3.4319, - "step": 24235 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037967668743304225, - "loss": 3.3862, - "step": 24240 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037959737382392637, - "loss": 3.3361, - "step": 24245 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003795180542294958, - "loss": 3.4328, - "step": 24250 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037943872865571525, - "loss": 3.4733, - "step": 24255 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037935939710854934, - "loss": 3.3578, - "step": 24260 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003792800595939634, - "loss": 3.3603, - "step": 24265 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037920071611792337, - "loss": 3.4714, - "step": 24270 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003791213666863953, - "loss": 3.4574, - "step": 24275 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037904201130534604, - "loss": 3.3614, - "step": 24280 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003789626499807426, - "loss": 3.3123, - "step": 24285 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003788832827185525, - "loss": 3.3467, - "step": 24290 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037880390952474386, - "loss": 3.4375, - "step": 24295 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037872453040528516, - "loss": 3.3211, - "step": 24300 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003786451453661451, - "loss": 3.3171, - "step": 24305 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037856575441329315, - "loss": 3.43, - "step": 24310 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003784863575526992, - "loss": 3.36, - "step": 24315 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037840695479033335, - "loss": 3.4438, - "step": 24320 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003783275461321663, - "loss": 3.323, - "step": 24325 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037824813158416937, - "loss": 3.4666, - "step": 24330 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037816871115231376, - "loss": 3.4379, - "step": 24335 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037808928484257175, - "loss": 3.501, - "step": 24340 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003780098526609158, - "loss": 3.4066, - "step": 24345 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037793041461331857, - "loss": 3.2325, - "step": 24350 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003778509707057536, - "loss": 3.3971, - "step": 24355 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037777152094419474, - "loss": 3.384, - "step": 24360 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003776920653346159, - "loss": 3.3162, - "step": 24365 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037761260388299205, - "loss": 3.4612, - "step": 24370 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037753313659529805, - "loss": 3.4324, - "step": 24375 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003774536634775097, - "loss": 3.4222, - "step": 24380 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003773741845356026, - "loss": 3.3924, - "step": 24385 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003772946997755534, - "loss": 3.4305, - "step": 24390 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003772152092033389, - "loss": 3.402, - "step": 24395 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003771357128249365, - "loss": 3.5204, - "step": 24400 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003770562106463237, - "loss": 3.3448, - "step": 24405 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003769767026734788, - "loss": 3.2582, - "step": 24410 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003768971889123803, - "loss": 3.4989, - "step": 24415 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003768176693690074, - "loss": 3.2957, - "step": 24420 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003767381440493394, - "loss": 3.4028, - "step": 24425 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003766586129593562, - "loss": 3.4257, - "step": 24430 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003765790761050383, - "loss": 3.4178, - "step": 24435 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003764995334923662, - "loss": 3.4097, - "step": 24440 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003764199851273213, - "loss": 3.4026, - "step": 24445 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003763404310158851, - "loss": 3.4466, - "step": 24450 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037626087116403973, - "loss": 3.3257, - "step": 24455 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037618130557776774, - "loss": 3.4463, - "step": 24460 - }, - { - "epoch": 0.42, - "learning_rate": 0.000376101734263052, - "loss": 3.2616, - "step": 24465 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037602215722587587, - "loss": 3.3278, - "step": 24470 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003759425744722231, - "loss": 3.3324, - "step": 24475 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037586298600807795, - "loss": 3.3937, - "step": 24480 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037578339183942503, - "loss": 3.3143, - "step": 24485 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037570379197224935, - "loss": 3.3917, - "step": 24490 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003756241864125365, - "loss": 3.3902, - "step": 24495 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003755445751662725, - "loss": 3.4043, - "step": 24500 - }, - { - "epoch": 0.42, - "eval_loss": 3.4218356609344482, - "eval_runtime": 149.9736, - "eval_samples_per_second": 12.275, - "eval_steps_per_second": 0.773, - "step": 24500 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003754649582394435, - "loss": 3.3665, - "step": 24505 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003753853356380365, - "loss": 3.4224, - "step": 24510 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037530570736803853, - "loss": 3.2309, - "step": 24515 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003752260734354374, - "loss": 3.32, - "step": 24520 - }, - { - "epoch": 0.42, - "learning_rate": 0.000375146433846221, - "loss": 3.3945, - "step": 24525 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003750667886063778, - "loss": 3.4484, - "step": 24530 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037498713772189695, - "loss": 3.3952, - "step": 24535 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003749074811987676, - "loss": 3.4158, - "step": 24540 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037482781904297954, - "loss": 3.45, - "step": 24545 - }, - { - "epoch": 0.42, - "learning_rate": 0.00037474815126052304, - "loss": 3.3781, - "step": 24550 - }, - { - "epoch": 0.42, - "learning_rate": 0.0003746684778573885, - "loss": 3.285, - "step": 24555 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003745887988395672, - "loss": 3.3847, - "step": 24560 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037450911421305056, - "loss": 3.4632, - "step": 24565 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037442942398383027, - "loss": 3.364, - "step": 24570 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003743497281578987, - "loss": 3.3277, - "step": 24575 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003742700267412487, - "loss": 3.4421, - "step": 24580 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037419031973987335, - "loss": 3.3185, - "step": 24585 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037411060715976613, - "loss": 3.45, - "step": 24590 - }, - { - "epoch": 0.43, - "learning_rate": 0.000374030889006921, - "loss": 3.4009, - "step": 24595 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003739511652873325, - "loss": 3.4462, - "step": 24600 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003738714360069954, - "loss": 3.2398, - "step": 24605 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037379170117190477, - "loss": 3.4085, - "step": 24610 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003737119607880565, - "loss": 3.4324, - "step": 24615 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037363221486144644, - "loss": 3.4352, - "step": 24620 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003735524633980713, - "loss": 3.451, - "step": 24625 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037347270640392783, - "loss": 3.3561, - "step": 24630 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003733929438850133, - "loss": 3.3655, - "step": 24635 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003733131758473256, - "loss": 3.4145, - "step": 24640 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003732334022968628, - "loss": 3.4254, - "step": 24645 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003731536232396235, - "loss": 3.341, - "step": 24650 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003730738386816065, - "loss": 3.2968, - "step": 24655 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037299404862881145, - "loss": 3.3911, - "step": 24660 - }, - { - "epoch": 0.43, - "learning_rate": 0.000372914253087238, - "loss": 3.3917, - "step": 24665 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037283445206288637, - "loss": 3.3416, - "step": 24670 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037275464556175725, - "loss": 3.3614, - "step": 24675 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003726748335898516, - "loss": 3.3741, - "step": 24680 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037259501615317096, - "loss": 3.343, - "step": 24685 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003725151932577172, - "loss": 3.3623, - "step": 24690 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003724353649094924, - "loss": 3.4274, - "step": 24695 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003723555311144995, - "loss": 3.3472, - "step": 24700 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037227569187874146, - "loss": 3.4346, - "step": 24705 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003721958472082217, - "loss": 3.5295, - "step": 24710 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003721159971089443, - "loss": 3.5022, - "step": 24715 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037203614158691354, - "loss": 3.3116, - "step": 24720 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003719562806481341, - "loss": 3.2844, - "step": 24725 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003718764142986111, - "loss": 3.4344, - "step": 24730 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003717965425443501, - "loss": 3.4131, - "step": 24735 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037171666539135705, - "loss": 3.3643, - "step": 24740 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037163678284563836, - "loss": 3.2979, - "step": 24745 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037155689491320066, - "loss": 3.358, - "step": 24750 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037147700160005125, - "loss": 3.4448, - "step": 24755 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003713971029121975, - "loss": 3.3963, - "step": 24760 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003713171988556478, - "loss": 3.2131, - "step": 24765 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003712372894364099, - "loss": 3.392, - "step": 24770 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037115737466049306, - "loss": 3.3659, - "step": 24775 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003710774545339064, - "loss": 3.3049, - "step": 24780 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037099752906265943, - "loss": 3.3554, - "step": 24785 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037091759825276207, - "loss": 3.318, - "step": 24790 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037083766211022467, - "loss": 3.2913, - "step": 24795 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037075772064105815, - "loss": 3.4315, - "step": 24800 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037067777385127383, - "loss": 3.3689, - "step": 24805 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037059782174688296, - "loss": 3.4208, - "step": 24810 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037051786433389775, - "loss": 3.2799, - "step": 24815 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003704379016183306, - "loss": 3.5046, - "step": 24820 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037035793360619423, - "loss": 3.4735, - "step": 24825 - }, - { - "epoch": 0.43, - "learning_rate": 0.00037027796030350183, - "loss": 3.3752, - "step": 24830 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003701979817162669, - "loss": 3.4892, - "step": 24835 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003701179978505035, - "loss": 3.2982, - "step": 24840 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003700380087122262, - "loss": 3.5165, - "step": 24845 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003699580143074494, - "loss": 3.3722, - "step": 24850 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036987801464218853, - "loss": 3.456, - "step": 24855 - }, - { - "epoch": 0.43, - "learning_rate": 0.000369798009722459, - "loss": 3.3965, - "step": 24860 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036971799955427695, - "loss": 3.4197, - "step": 24865 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003696379841436586, - "loss": 3.3727, - "step": 24870 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036955796349662066, - "loss": 3.3547, - "step": 24875 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036947793761918043, - "loss": 3.3268, - "step": 24880 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036939790651735536, - "loss": 3.4494, - "step": 24885 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003693178701971633, - "loss": 3.3767, - "step": 24890 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003692378286646227, - "loss": 3.4077, - "step": 24895 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036915778192575223, - "loss": 3.4866, - "step": 24900 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036907772998657107, - "loss": 3.2929, - "step": 24905 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036899767285309846, - "loss": 3.5017, - "step": 24910 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003689176105313545, - "loss": 3.3838, - "step": 24915 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003688375430273595, - "loss": 3.2769, - "step": 24920 - }, - { - "epoch": 0.43, - "learning_rate": 0.000368757470347134, - "loss": 3.3815, - "step": 24925 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003686773924966992, - "loss": 3.381, - "step": 24930 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036859730948207627, - "loss": 3.3977, - "step": 24935 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003685172213092873, - "loss": 3.5147, - "step": 24940 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003684371279843546, - "loss": 3.3731, - "step": 24945 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003683570295133004, - "loss": 3.4501, - "step": 24950 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003682769259021479, - "loss": 3.3245, - "step": 24955 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036819681715692063, - "loss": 3.4224, - "step": 24960 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036811670328364214, - "loss": 3.3934, - "step": 24965 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003680365842883367, - "loss": 3.3077, - "step": 24970 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003679564601770287, - "loss": 3.3332, - "step": 24975 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003678763309557432, - "loss": 3.3636, - "step": 24980 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036779619663050554, - "loss": 3.3194, - "step": 24985 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036771605720734127, - "loss": 3.487, - "step": 24990 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036763591269227663, - "loss": 3.3416, - "step": 24995 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003675557630913379, - "loss": 3.3194, - "step": 25000 - }, - { - "epoch": 0.43, - "eval_loss": 3.4098989963531494, - "eval_runtime": 149.8725, - "eval_samples_per_second": 12.284, - "eval_steps_per_second": 0.774, - "step": 25000 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036747560841055206, - "loss": 3.4664, - "step": 25005 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036739544865594623, - "loss": 3.3433, - "step": 25010 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036731528383354803, - "loss": 3.3198, - "step": 25015 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003672351139493855, - "loss": 3.3991, - "step": 25020 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036715493900948704, - "loss": 3.319, - "step": 25025 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036707475901988124, - "loss": 3.3713, - "step": 25030 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003669945739865974, - "loss": 3.34, - "step": 25035 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003669143839156648, - "loss": 3.4226, - "step": 25040 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036683418881311355, - "loss": 3.3767, - "step": 25045 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036675398868497394, - "loss": 3.4798, - "step": 25050 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003666737835372763, - "loss": 3.3018, - "step": 25055 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003665935733760519, - "loss": 3.4508, - "step": 25060 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036651335820733213, - "loss": 3.351, - "step": 25065 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003664331380371486, - "loss": 3.4597, - "step": 25070 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036635291287153365, - "loss": 3.3536, - "step": 25075 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036627268271651955, - "loss": 3.4136, - "step": 25080 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036619244757813953, - "loss": 3.24, - "step": 25085 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003661122074624266, - "loss": 3.3397, - "step": 25090 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036603196237541445, - "loss": 3.3929, - "step": 25095 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036595171232313715, - "loss": 3.3473, - "step": 25100 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003658714573116292, - "loss": 3.3839, - "step": 25105 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003657911973469252, - "loss": 3.3912, - "step": 25110 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003657109324350602, - "loss": 3.2797, - "step": 25115 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003656306625820699, - "loss": 3.3835, - "step": 25120 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003655503877939902, - "loss": 3.3576, - "step": 25125 - }, - { - "epoch": 0.43, - "learning_rate": 0.00036547010807685726, - "loss": 3.4066, - "step": 25130 - }, - { - "epoch": 0.43, - "learning_rate": 0.0003653898234367078, - "loss": 3.4692, - "step": 25135 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003653095338795786, - "loss": 3.3662, - "step": 25140 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003652292394115072, - "loss": 3.2443, - "step": 25145 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036514894003853126, - "loss": 3.4306, - "step": 25150 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036506863576668894, - "loss": 3.3949, - "step": 25155 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003649883266020187, - "loss": 3.3633, - "step": 25160 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003649080125505593, - "loss": 3.4211, - "step": 25165 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036482769361835, - "loss": 3.3037, - "step": 25170 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003647473698114305, - "loss": 3.2987, - "step": 25175 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003646670411358405, - "loss": 3.32, - "step": 25180 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036458670759762044, - "loss": 3.3066, - "step": 25185 - }, - { - "epoch": 0.44, - "learning_rate": 0.000364506369202811, - "loss": 3.446, - "step": 25190 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036442602595745314, - "loss": 3.4656, - "step": 25195 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036434567786758836, - "loss": 3.3859, - "step": 25200 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003642653249392584, - "loss": 3.363, - "step": 25205 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003641849671785053, - "loss": 3.3929, - "step": 25210 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003641046045913716, - "loss": 3.3327, - "step": 25215 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003640242371839002, - "loss": 3.4705, - "step": 25220 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036394386496213423, - "loss": 3.4551, - "step": 25225 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036386348793211737, - "loss": 3.4754, - "step": 25230 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003637831060998935, - "loss": 3.341, - "step": 25235 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036370271947150693, - "loss": 3.4458, - "step": 25240 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003636223280530024, - "loss": 3.3916, - "step": 25245 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003635419318504248, - "loss": 3.3774, - "step": 25250 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036346153086981955, - "loss": 3.3418, - "step": 25255 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036338112511723235, - "loss": 3.2995, - "step": 25260 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036330071459870946, - "loss": 3.318, - "step": 25265 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036322029932029726, - "loss": 3.3669, - "step": 25270 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036313987928804256, - "loss": 3.4883, - "step": 25275 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036305945450799243, - "loss": 3.3465, - "step": 25280 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003629790249861946, - "loss": 3.3789, - "step": 25285 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036289859072869687, - "loss": 3.408, - "step": 25290 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003628181517415473, - "loss": 3.3993, - "step": 25295 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003627377080307947, - "loss": 3.323, - "step": 25300 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036265725960248806, - "loss": 3.3462, - "step": 25305 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003625768064626766, - "loss": 3.3265, - "step": 25310 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003624963486174101, - "loss": 3.2759, - "step": 25315 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036241588607273823, - "loss": 3.4092, - "step": 25320 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003623354188347118, - "loss": 3.2942, - "step": 25325 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036225494690938126, - "loss": 3.3437, - "step": 25330 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036217447030279773, - "loss": 3.3779, - "step": 25335 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003620939890210128, - "loss": 3.3958, - "step": 25340 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036201350307007795, - "loss": 3.438, - "step": 25345 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003619330124560456, - "loss": 3.4422, - "step": 25350 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003618525171849681, - "loss": 3.3307, - "step": 25355 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003617720172628982, - "loss": 3.4132, - "step": 25360 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003616915126958893, - "loss": 3.3374, - "step": 25365 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003616110034899948, - "loss": 3.3578, - "step": 25370 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003615304896512685, - "loss": 3.4068, - "step": 25375 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003614499711857648, - "loss": 3.2033, - "step": 25380 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036136944809953816, - "loss": 3.3313, - "step": 25385 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003612889203986436, - "loss": 3.2963, - "step": 25390 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036120838808913625, - "loss": 3.3687, - "step": 25395 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003611278511770718, - "loss": 3.3163, - "step": 25400 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036104730966850625, - "loss": 3.4108, - "step": 25405 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003609667635694959, - "loss": 3.4129, - "step": 25410 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003608862128860973, - "loss": 3.3316, - "step": 25415 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036080565762436757, - "loss": 3.2811, - "step": 25420 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036072509779036397, - "loss": 3.4021, - "step": 25425 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036064453339014436, - "loss": 3.4404, - "step": 25430 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036056396442976657, - "loss": 3.4088, - "step": 25435 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036048339091528895, - "loss": 3.3295, - "step": 25440 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003604028128527704, - "loss": 3.4301, - "step": 25445 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036032223024826983, - "loss": 3.3048, - "step": 25450 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003602416431078468, - "loss": 3.2633, - "step": 25455 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003601610514375608, - "loss": 3.3771, - "step": 25460 - }, - { - "epoch": 0.44, - "learning_rate": 0.00036008045524347213, - "loss": 3.3616, - "step": 25465 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035999985453164115, - "loss": 3.3544, - "step": 25470 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035991924930812855, - "loss": 3.3366, - "step": 25475 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003598386395789955, - "loss": 3.405, - "step": 25480 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003597580253503034, - "loss": 3.3767, - "step": 25485 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003596774066281141, - "loss": 3.2993, - "step": 25490 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035959678341848964, - "loss": 3.4348, - "step": 25495 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035951615572749247, - "loss": 3.2641, - "step": 25500 - }, - { - "epoch": 0.44, - "eval_loss": 3.403398036956787, - "eval_runtime": 149.6768, - "eval_samples_per_second": 12.3, - "eval_steps_per_second": 0.775, - "step": 25500 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035943552356118547, - "loss": 3.3219, - "step": 25505 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035935488692563167, - "loss": 3.3383, - "step": 25510 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003592742458268945, - "loss": 3.3004, - "step": 25515 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035919360027103785, - "loss": 3.2707, - "step": 25520 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003591129502641259, - "loss": 3.3976, - "step": 25525 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035903229581222293, - "loss": 3.3484, - "step": 25530 - }, - { - "epoch": 0.44, - "learning_rate": 0.000358951636921394, - "loss": 3.3598, - "step": 25535 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035887097359770393, - "loss": 3.258, - "step": 25540 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003587903058472184, - "loss": 3.3335, - "step": 25545 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035870963367600324, - "loss": 3.3303, - "step": 25550 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035862895709012443, - "loss": 3.4024, - "step": 25555 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003585482760956486, - "loss": 3.3626, - "step": 25560 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003584675906986423, - "loss": 3.3502, - "step": 25565 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035838690090517303, - "loss": 3.4236, - "step": 25570 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003583062067213079, - "loss": 3.3696, - "step": 25575 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035822550815311485, - "loss": 3.3772, - "step": 25580 - }, - { - "epoch": 0.44, - "learning_rate": 0.000358144805206662, - "loss": 3.445, - "step": 25585 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035806409788801785, - "loss": 3.4372, - "step": 25590 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003579833862032511, - "loss": 3.4523, - "step": 25595 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035790267015843076, - "loss": 3.3395, - "step": 25600 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003578219497596264, - "loss": 3.3512, - "step": 25605 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003577412250129079, - "loss": 3.3397, - "step": 25610 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003576604959243451, - "loss": 3.3981, - "step": 25615 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035757976250000846, - "loss": 3.3503, - "step": 25620 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003574990247459688, - "loss": 3.3889, - "step": 25625 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003574182826682972, - "loss": 3.3094, - "step": 25630 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035733753627306496, - "loss": 3.3504, - "step": 25635 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003572567855663439, - "loss": 3.3083, - "step": 25640 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003571760305542059, - "loss": 3.4901, - "step": 25645 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003570952712427235, - "loss": 3.3134, - "step": 25650 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003570145076379693, - "loss": 3.4654, - "step": 25655 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003569337397460164, - "loss": 3.347, - "step": 25660 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003568529675729379, - "loss": 3.4081, - "step": 25665 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035677219112480765, - "loss": 3.381, - "step": 25670 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003566914104076996, - "loss": 3.2434, - "step": 25675 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035661062542768804, - "loss": 3.4147, - "step": 25680 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035652983619084754, - "loss": 3.3619, - "step": 25685 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035644904270325315, - "loss": 3.3556, - "step": 25690 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035636824497098, - "loss": 3.362, - "step": 25695 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035628744300010367, - "loss": 3.3565, - "step": 25700 - }, - { - "epoch": 0.44, - "learning_rate": 0.00035620663679670014, - "loss": 3.3171, - "step": 25705 - }, - { - "epoch": 0.44, - "learning_rate": 0.0003561258263668456, - "loss": 3.3843, - "step": 25710 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003560450117166165, - "loss": 3.4695, - "step": 25715 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003559641928520897, - "loss": 3.432, - "step": 25720 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003558833697793425, - "loss": 3.4059, - "step": 25725 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035580254250445234, - "loss": 3.3132, - "step": 25730 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003557217110334969, - "loss": 3.4028, - "step": 25735 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003556408753725543, - "loss": 3.3004, - "step": 25740 - }, - { - "epoch": 0.45, - "learning_rate": 0.000355560035527703, - "loss": 3.3349, - "step": 25745 - }, - { - "epoch": 0.45, - "learning_rate": 0.000355479191505022, - "loss": 3.3751, - "step": 25750 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035539834331058996, - "loss": 3.3602, - "step": 25755 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035531749095048633, - "loss": 3.3683, - "step": 25760 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035523663443079104, - "loss": 3.4222, - "step": 25765 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035515577375758384, - "loss": 3.3746, - "step": 25770 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035507490893694517, - "loss": 3.3384, - "step": 25775 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035499403997495555, - "loss": 3.3536, - "step": 25780 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035491316687769596, - "loss": 3.3968, - "step": 25785 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003548322896512477, - "loss": 3.3728, - "step": 25790 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035475140830169225, - "loss": 3.2715, - "step": 25795 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003546705228351115, - "loss": 3.4149, - "step": 25800 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003545896332575876, - "loss": 3.4619, - "step": 25805 - }, - { - "epoch": 0.45, - "learning_rate": 0.000354508739575203, - "loss": 3.3945, - "step": 25810 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003544278417940406, - "loss": 3.204, - "step": 25815 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003543469399201834, - "loss": 3.3092, - "step": 25820 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003542660339597149, - "loss": 3.3261, - "step": 25825 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003541851239187188, - "loss": 3.2748, - "step": 25830 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003541042098032789, - "loss": 3.2542, - "step": 25835 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003540232916194798, - "loss": 3.3005, - "step": 25840 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035394236937340604, - "loss": 3.2868, - "step": 25845 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035386144307114257, - "loss": 3.3108, - "step": 25850 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003537805127187746, - "loss": 3.3181, - "step": 25855 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003536995783223876, - "loss": 3.4812, - "step": 25860 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035361863988806765, - "loss": 3.369, - "step": 25865 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035353769742190073, - "loss": 3.3039, - "step": 25870 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003534567509299733, - "loss": 3.3852, - "step": 25875 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003533758004183722, - "loss": 3.4023, - "step": 25880 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003532948458931844, - "loss": 3.4037, - "step": 25885 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003532138873604974, - "loss": 3.416, - "step": 25890 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035313292482639876, - "loss": 3.3859, - "step": 25895 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003530519582969765, - "loss": 3.2529, - "step": 25900 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035297098777831886, - "loss": 3.3192, - "step": 25905 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035289001327651445, - "loss": 3.3498, - "step": 25910 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035280903479765205, - "loss": 3.4666, - "step": 25915 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035272805234782075, - "loss": 3.3699, - "step": 25920 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035264706593311025, - "loss": 3.2185, - "step": 25925 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035256607555961023, - "loss": 3.4041, - "step": 25930 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035248508123341066, - "loss": 3.2361, - "step": 25935 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035240408296060205, - "loss": 3.289, - "step": 25940 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003523230807472749, - "loss": 3.4608, - "step": 25945 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035224207459952025, - "loss": 3.3191, - "step": 25950 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003521610645234294, - "loss": 3.4509, - "step": 25955 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035208005052509366, - "loss": 3.3884, - "step": 25960 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035199903261060516, - "loss": 3.2898, - "step": 25965 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035191801078605586, - "loss": 3.3371, - "step": 25970 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035183698505753815, - "loss": 3.3688, - "step": 25975 - }, - { - "epoch": 0.45, - "learning_rate": 0.000351755955431145, - "loss": 3.3285, - "step": 25980 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035167492191296904, - "loss": 3.3297, - "step": 25985 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003515938845091039, - "loss": 3.3815, - "step": 25990 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035151284322564307, - "loss": 3.3746, - "step": 25995 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003514317980686803, - "loss": 3.3711, - "step": 26000 - }, - { - "epoch": 0.45, - "eval_loss": 3.395720958709717, - "eval_runtime": 149.6727, - "eval_samples_per_second": 12.3, - "eval_steps_per_second": 0.775, - "step": 26000 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003513507490443101, - "loss": 3.3657, - "step": 26005 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003512696961586266, - "loss": 3.3346, - "step": 26010 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035118863941772477, - "loss": 3.4844, - "step": 26015 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035110757882769955, - "loss": 3.3899, - "step": 26020 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003510265143946463, - "loss": 3.399, - "step": 26025 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035094544612466073, - "loss": 3.2881, - "step": 26030 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003508643740238387, - "loss": 3.4151, - "step": 26035 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035078329809827633, - "loss": 3.2765, - "step": 26040 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003507022183540703, - "loss": 3.4003, - "step": 26045 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003506211347973173, - "loss": 3.4405, - "step": 26050 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035054004743411435, - "loss": 3.3197, - "step": 26055 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003504589562705589, - "loss": 3.27, - "step": 26060 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035037786131274836, - "loss": 3.335, - "step": 26065 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035029676256678104, - "loss": 3.2878, - "step": 26070 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003502156600387549, - "loss": 3.3474, - "step": 26075 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003501345537347684, - "loss": 3.4218, - "step": 26080 - }, - { - "epoch": 0.45, - "learning_rate": 0.00035005344366092047, - "loss": 3.4272, - "step": 26085 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003499723298233101, - "loss": 3.3154, - "step": 26090 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034989121222803665, - "loss": 3.3007, - "step": 26095 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003498100908811996, - "loss": 3.5048, - "step": 26100 - }, - { - "epoch": 0.45, - "learning_rate": 0.000349728965788899, - "loss": 3.411, - "step": 26105 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034964783695723515, - "loss": 3.3317, - "step": 26110 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034956670439230836, - "loss": 3.424, - "step": 26115 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003494855681002194, - "loss": 3.3243, - "step": 26120 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034940442808706926, - "loss": 3.4425, - "step": 26125 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034932328435895946, - "loss": 3.413, - "step": 26130 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003492421369219914, - "loss": 3.4129, - "step": 26135 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034916098578226696, - "loss": 3.4568, - "step": 26140 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034907983094588846, - "loss": 3.2578, - "step": 26145 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003489986724189581, - "loss": 3.4228, - "step": 26150 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003489175102075787, - "loss": 3.2936, - "step": 26155 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034883634431785325, - "loss": 3.4025, - "step": 26160 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003487551747558849, - "loss": 3.308, - "step": 26165 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003486740015277774, - "loss": 3.351, - "step": 26170 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003485928246396344, - "loss": 3.2869, - "step": 26175 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034851164409756, - "loss": 3.3198, - "step": 26180 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003484304599076586, - "loss": 3.3552, - "step": 26185 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034834927207603474, - "loss": 3.4593, - "step": 26190 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034826808060879356, - "loss": 3.3524, - "step": 26195 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034818688551204, - "loss": 3.2367, - "step": 26200 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003481056867918795, - "loss": 3.2815, - "step": 26205 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003480244844544181, - "loss": 3.4033, - "step": 26210 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003479432785057615, - "loss": 3.3144, - "step": 26215 - }, - { - "epoch": 0.45, - "learning_rate": 0.000347862068952016, - "loss": 3.3572, - "step": 26220 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034778085579928827, - "loss": 3.3663, - "step": 26225 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034769963905368495, - "loss": 3.3651, - "step": 26230 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003476184187213135, - "loss": 3.3908, - "step": 26235 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034753719480828076, - "loss": 3.3412, - "step": 26240 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034745596732069456, - "loss": 3.1838, - "step": 26245 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034737473626466296, - "loss": 3.3703, - "step": 26250 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034729350164629393, - "loss": 3.4167, - "step": 26255 - }, - { - "epoch": 0.45, - "learning_rate": 0.000347212263471696, - "loss": 3.445, - "step": 26260 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034713102174697773, - "loss": 3.3665, - "step": 26265 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003470497764782483, - "loss": 3.4791, - "step": 26270 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003469685276716167, - "loss": 3.327, - "step": 26275 - }, - { - "epoch": 0.45, - "learning_rate": 0.00034688727533319254, - "loss": 3.479, - "step": 26280 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003468060194690856, - "loss": 3.2287, - "step": 26285 - }, - { - "epoch": 0.45, - "learning_rate": 0.0003467247600854059, - "loss": 3.3918, - "step": 26290 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003466434971882636, - "loss": 3.511, - "step": 26295 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003465622307837695, - "loss": 3.3726, - "step": 26300 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003464809608780341, - "loss": 3.2298, - "step": 26305 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003463996874771688, - "loss": 3.3942, - "step": 26310 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034631841058728474, - "loss": 3.4149, - "step": 26315 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003462371302144935, - "loss": 3.3502, - "step": 26320 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034615584636490715, - "loss": 3.2583, - "step": 26325 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034607455904463766, - "loss": 3.357, - "step": 26330 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003459932682597974, - "loss": 3.3674, - "step": 26335 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034591197401649914, - "loss": 3.4351, - "step": 26340 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003458306763208557, - "loss": 3.3208, - "step": 26345 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034574937517898034, - "loss": 3.3437, - "step": 26350 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034566807059698647, - "loss": 3.4414, - "step": 26355 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003455867625809876, - "loss": 3.3091, - "step": 26360 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034550545113709786, - "loss": 3.3744, - "step": 26365 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003454241362714314, - "loss": 3.354, - "step": 26370 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003453428179901028, - "loss": 3.3764, - "step": 26375 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003452614962992265, - "loss": 3.345, - "step": 26380 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034518017120491765, - "loss": 3.3632, - "step": 26385 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003450988427132915, - "loss": 3.3498, - "step": 26390 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003450175108304636, - "loss": 3.3696, - "step": 26395 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034493617556254953, - "loss": 3.3475, - "step": 26400 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034485483691566537, - "loss": 3.3516, - "step": 26405 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003447734948959273, - "loss": 3.3455, - "step": 26410 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003446921495094519, - "loss": 3.3736, - "step": 26415 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034461080076235597, - "loss": 3.4373, - "step": 26420 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003445294486607564, - "loss": 3.3134, - "step": 26425 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034444809321077045, - "loss": 3.3514, - "step": 26430 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003443667344185157, - "loss": 3.4766, - "step": 26435 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034428537229011, - "loss": 3.3528, - "step": 26440 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003442040068316711, - "loss": 3.3033, - "step": 26445 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003441226380493174, - "loss": 3.4208, - "step": 26450 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003440412659491677, - "loss": 3.3896, - "step": 26455 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034395989053734034, - "loss": 3.3191, - "step": 26460 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003438785118199545, - "loss": 3.3925, - "step": 26465 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003437971298031295, - "loss": 3.3533, - "step": 26470 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034371574449298476, - "loss": 3.3886, - "step": 26475 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003436343558956401, - "loss": 3.3418, - "step": 26480 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034355296401721545, - "loss": 3.4094, - "step": 26485 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003434715688638312, - "loss": 3.4429, - "step": 26490 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034339017044160766, - "loss": 3.3538, - "step": 26495 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003433087687566657, - "loss": 3.3811, - "step": 26500 - }, - { - "epoch": 0.46, - "eval_loss": 3.386625289916992, - "eval_runtime": 149.9818, - "eval_samples_per_second": 12.275, - "eval_steps_per_second": 0.773, - "step": 26500 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003432273638151263, - "loss": 3.4065, - "step": 26505 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034314595562311064, - "loss": 3.3432, - "step": 26510 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003430645441867404, - "loss": 3.4248, - "step": 26515 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034298312951213696, - "loss": 3.3458, - "step": 26520 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034290171160542243, - "loss": 3.4478, - "step": 26525 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003428202904727192, - "loss": 3.3292, - "step": 26530 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003427388661201495, - "loss": 3.4159, - "step": 26535 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034265743855383616, - "loss": 3.3999, - "step": 26540 - }, - { - "epoch": 0.46, - "learning_rate": 0.000342576007779902, - "loss": 3.4405, - "step": 26545 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003424945738044702, - "loss": 3.2289, - "step": 26550 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034241313663366436, - "loss": 3.3146, - "step": 26555 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034233169627360797, - "loss": 3.2682, - "step": 26560 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003422502527304249, - "loss": 3.401, - "step": 26565 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003421688060102394, - "loss": 3.4786, - "step": 26570 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003420873561191758, - "loss": 3.3598, - "step": 26575 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034200590306335873, - "loss": 3.3288, - "step": 26580 - }, - { - "epoch": 0.46, - "learning_rate": 0.000341924446848913, - "loss": 3.3177, - "step": 26585 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003418429874819637, - "loss": 3.3151, - "step": 26590 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003417615249686363, - "loss": 3.3541, - "step": 26595 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003416800593150561, - "loss": 3.3449, - "step": 26600 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034159859052734917, - "loss": 3.3314, - "step": 26605 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034151711861164135, - "loss": 3.3612, - "step": 26610 - }, - { - "epoch": 0.46, - "learning_rate": 0.000341435643574059, - "loss": 3.3928, - "step": 26615 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034135416542072864, - "loss": 3.3273, - "step": 26620 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034127268415777687, - "loss": 3.3225, - "step": 26625 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034119119979133085, - "loss": 3.3973, - "step": 26630 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034110971232751775, - "loss": 3.3591, - "step": 26635 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034102822177246487, - "loss": 3.1721, - "step": 26640 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003409467281323, - "loss": 3.1719, - "step": 26645 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034086523141315105, - "loss": 3.3968, - "step": 26650 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034078373162114615, - "loss": 3.3185, - "step": 26655 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034070222876241365, - "loss": 3.3102, - "step": 26660 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003406207228430821, - "loss": 3.4479, - "step": 26665 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034053921386928036, - "loss": 3.4227, - "step": 26670 - }, - { - "epoch": 0.46, - "learning_rate": 0.00034045770184713755, - "loss": 3.4028, - "step": 26675 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003403761867827829, - "loss": 3.4202, - "step": 26680 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003402946686823459, - "loss": 3.3627, - "step": 26685 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003402131475519563, - "loss": 3.4673, - "step": 26690 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003401316233977441, - "loss": 3.4651, - "step": 26695 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003400500962258395, - "loss": 3.2451, - "step": 26700 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003399685660423729, - "loss": 3.3571, - "step": 26705 - }, - { - "epoch": 0.46, - "learning_rate": 0.000339887032853475, - "loss": 3.3063, - "step": 26710 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003398054966652767, - "loss": 3.1844, - "step": 26715 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033972395748390905, - "loss": 3.3216, - "step": 26720 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033964241531550326, - "loss": 3.2759, - "step": 26725 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033956087016619104, - "loss": 3.2445, - "step": 26730 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003394793220421042, - "loss": 3.2716, - "step": 26735 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003393977709493746, - "loss": 3.4037, - "step": 26740 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033931621689413464, - "loss": 3.3809, - "step": 26745 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003392346598825165, - "loss": 3.3758, - "step": 26750 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033915309992065316, - "loss": 3.3075, - "step": 26755 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033907153701467733, - "loss": 3.463, - "step": 26760 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033898997117072206, - "loss": 3.3957, - "step": 26765 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033890840239492084, - "loss": 3.3882, - "step": 26770 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003388268306934072, - "loss": 3.3903, - "step": 26775 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003387452560723148, - "loss": 3.3708, - "step": 26780 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033866367853777784, - "loss": 3.3458, - "step": 26785 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033858209809593023, - "loss": 3.3112, - "step": 26790 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003385005147529067, - "loss": 3.2947, - "step": 26795 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003384189285148417, - "loss": 3.3433, - "step": 26800 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003383373393878702, - "loss": 3.3527, - "step": 26805 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033825574737812725, - "loss": 3.1773, - "step": 26810 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033817415249174824, - "loss": 3.3979, - "step": 26815 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003380925547348684, - "loss": 3.4185, - "step": 26820 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003380109541136238, - "loss": 3.3596, - "step": 26825 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033792935063415024, - "loss": 3.4408, - "step": 26830 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033784774430258393, - "loss": 3.2784, - "step": 26835 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003377661351250612, - "loss": 3.3703, - "step": 26840 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003376845231077186, - "loss": 3.3976, - "step": 26845 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033760290825669305, - "loss": 3.4119, - "step": 26850 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003375212905781215, - "loss": 3.4389, - "step": 26855 - }, - { - "epoch": 0.46, - "learning_rate": 0.00033743967007814123, - "loss": 3.4103, - "step": 26860 - }, - { - "epoch": 0.46, - "learning_rate": 0.0003373580467628895, - "loss": 3.3018, - "step": 26865 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003372764206385042, - "loss": 3.3944, - "step": 26870 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033719479171112317, - "loss": 3.3529, - "step": 26875 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033711315998688436, - "loss": 3.3456, - "step": 26880 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003370315254719261, - "loss": 3.3234, - "step": 26885 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033694988817238694, - "loss": 3.2187, - "step": 26890 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003368682480944056, - "loss": 3.4029, - "step": 26895 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033678660524412093, - "loss": 3.3446, - "step": 26900 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033670495962767193, - "loss": 3.3761, - "step": 26905 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033662331125119824, - "loss": 3.2874, - "step": 26910 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033654166012083916, - "loss": 3.2817, - "step": 26915 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033646000624273453, - "loss": 3.4034, - "step": 26920 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033637834962302435, - "loss": 3.4035, - "step": 26925 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003362966902678486, - "loss": 3.3829, - "step": 26930 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033621502818334786, - "loss": 3.3859, - "step": 26935 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003361333633756626, - "loss": 3.2862, - "step": 26940 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003360516958509335, - "loss": 3.3791, - "step": 26945 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003359700256153017, - "loss": 3.3683, - "step": 26950 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033588835267490827, - "loss": 3.3453, - "step": 26955 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003358066770358947, - "loss": 3.3683, - "step": 26960 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003357249987044025, - "loss": 3.3918, - "step": 26965 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003356433176865735, - "loss": 3.3034, - "step": 26970 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033556163398854966, - "loss": 3.3282, - "step": 26975 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003354799476164732, - "loss": 3.4101, - "step": 26980 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003353982585764865, - "loss": 3.3256, - "step": 26985 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003353165668747322, - "loss": 3.3154, - "step": 26990 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033523487251735303, - "loss": 3.2524, - "step": 26995 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033515317551049213, - "loss": 3.3979, - "step": 27000 - }, - { - "epoch": 0.47, - "eval_loss": 3.3817105293273926, - "eval_runtime": 149.7679, - "eval_samples_per_second": 12.292, - "eval_steps_per_second": 0.775, - "step": 27000 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003350714758602924, - "loss": 3.3361, - "step": 27005 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003349897735728975, - "loss": 3.3722, - "step": 27010 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033490806865445097, - "loss": 3.4067, - "step": 27015 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003348263611110966, - "loss": 3.3635, - "step": 27020 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033474465094897836, - "loss": 3.4518, - "step": 27025 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003346629381742404, - "loss": 3.3886, - "step": 27030 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033458122279302706, - "loss": 3.3627, - "step": 27035 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003344995048114832, - "loss": 3.2823, - "step": 27040 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033441778423575315, - "loss": 3.3224, - "step": 27045 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033433606107198223, - "loss": 3.4366, - "step": 27050 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033425433532631546, - "loss": 3.2814, - "step": 27055 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003341726070048983, - "loss": 3.4437, - "step": 27060 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003340908761138762, - "loss": 3.3634, - "step": 27065 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033400914265939484, - "loss": 3.3136, - "step": 27070 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033392740664760033, - "loss": 3.2769, - "step": 27075 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003338456680846387, - "loss": 3.3517, - "step": 27080 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033376392697665624, - "loss": 3.299, - "step": 27085 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033368218332979955, - "loss": 3.3863, - "step": 27090 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003336004371502152, - "loss": 3.3336, - "step": 27095 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003335186884440503, - "loss": 3.2838, - "step": 27100 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033343693721745186, - "loss": 3.3958, - "step": 27105 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003333551834765669, - "loss": 3.2276, - "step": 27110 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033327342722754325, - "loss": 3.3027, - "step": 27115 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003331916684765284, - "loss": 3.3461, - "step": 27120 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033310990722967, - "loss": 3.4142, - "step": 27125 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033302814349311645, - "loss": 3.4698, - "step": 27130 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033294637727301565, - "loss": 3.3017, - "step": 27135 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003328646085755163, - "loss": 3.2897, - "step": 27140 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003327828374067667, - "loss": 3.4139, - "step": 27145 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033270106377291563, - "loss": 3.4206, - "step": 27150 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033261928768011235, - "loss": 3.3275, - "step": 27155 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003325375091345057, - "loss": 3.304, - "step": 27160 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003324557281422451, - "loss": 3.2816, - "step": 27165 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003323739447094802, - "loss": 3.3583, - "step": 27170 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003322921588423604, - "loss": 3.2299, - "step": 27175 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003322103705470359, - "loss": 3.2416, - "step": 27180 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033212857982965657, - "loss": 3.3558, - "step": 27185 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003320467866963726, - "loss": 3.3987, - "step": 27190 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003319649911533346, - "loss": 3.3418, - "step": 27195 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003318831932066931, - "loss": 3.3419, - "step": 27200 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033180139286259885, - "loss": 3.4227, - "step": 27205 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033171959012720276, - "loss": 3.3857, - "step": 27210 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033163778500665606, - "loss": 3.2885, - "step": 27215 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033155597750711023, - "loss": 3.3045, - "step": 27220 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033147416763471644, - "loss": 3.3942, - "step": 27225 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003313923553956266, - "loss": 3.3544, - "step": 27230 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003313105407959926, - "loss": 3.2217, - "step": 27235 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033122872384196627, - "loss": 3.2347, - "step": 27240 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003311469045397001, - "loss": 3.2675, - "step": 27245 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033106508289534615, - "loss": 3.274, - "step": 27250 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003309832589150573, - "loss": 3.3786, - "step": 27255 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033090143260498616, - "loss": 3.3703, - "step": 27260 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003308196039712856, - "loss": 3.2512, - "step": 27265 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033073777302010875, - "loss": 3.3575, - "step": 27270 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003306559397576089, - "loss": 3.2458, - "step": 27275 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003305741041899396, - "loss": 3.3806, - "step": 27280 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003304922663232543, - "loss": 3.3828, - "step": 27285 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033041042616370676, - "loss": 3.3612, - "step": 27290 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003303285837174511, - "loss": 3.2727, - "step": 27295 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033024673899064143, - "loss": 3.2924, - "step": 27300 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033016489198943184, - "loss": 3.3078, - "step": 27305 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003300830427199771, - "loss": 3.2827, - "step": 27310 - }, - { - "epoch": 0.47, - "learning_rate": 0.00033000119118843164, - "loss": 3.328, - "step": 27315 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003299193374009505, - "loss": 3.3192, - "step": 27320 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032983748136368836, - "loss": 3.4029, - "step": 27325 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032975562308280057, - "loss": 3.4059, - "step": 27330 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032967376256444247, - "loss": 3.3607, - "step": 27335 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032959189981476947, - "loss": 3.2867, - "step": 27340 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032951003483993734, - "loss": 3.3518, - "step": 27345 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032942816764610164, - "loss": 3.2867, - "step": 27350 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003293462982394187, - "loss": 3.3899, - "step": 27355 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003292644266260445, - "loss": 3.22, - "step": 27360 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003291825528121354, - "loss": 3.4481, - "step": 27365 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032910067680384786, - "loss": 3.2794, - "step": 27370 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032901879860733856, - "loss": 3.3851, - "step": 27375 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003289369182287644, - "loss": 3.3057, - "step": 27380 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032885503567428224, - "loss": 3.3024, - "step": 27385 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003287731509500492, - "loss": 3.3614, - "step": 27390 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003286912640622227, - "loss": 3.2706, - "step": 27395 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003286093750169603, - "loss": 3.2274, - "step": 27400 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003285274838204194, - "loss": 3.3175, - "step": 27405 - }, - { - "epoch": 0.47, - "learning_rate": 0.000328445590478758, - "loss": 3.4169, - "step": 27410 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032836369499813383, - "loss": 3.3511, - "step": 27415 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032828179738470534, - "loss": 3.4634, - "step": 27420 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032819989764463055, - "loss": 3.3259, - "step": 27425 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032811799578406794, - "loss": 3.3551, - "step": 27430 - }, - { - "epoch": 0.47, - "learning_rate": 0.0003280360918091762, - "loss": 3.2537, - "step": 27435 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032795418572611405, - "loss": 3.2446, - "step": 27440 - }, - { - "epoch": 0.47, - "learning_rate": 0.00032787227754104033, - "loss": 3.289, - "step": 27445 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032779036726011425, - "loss": 3.3049, - "step": 27450 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003277084548894949, - "loss": 3.3346, - "step": 27455 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032762654043534185, - "loss": 3.3805, - "step": 27460 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003275446239038145, - "loss": 3.343, - "step": 27465 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003274627053010725, - "loss": 3.2794, - "step": 27470 - }, - { - "epoch": 0.48, - "learning_rate": 0.000327380784633276, - "loss": 3.34, - "step": 27475 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032729886190658463, - "loss": 3.3971, - "step": 27480 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003272169371271589, - "loss": 3.3285, - "step": 27485 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003271350103011589, - "loss": 3.2486, - "step": 27490 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003270530814347452, - "loss": 3.4702, - "step": 27495 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032697115053407843, - "loss": 3.4144, - "step": 27500 - }, - { - "epoch": 0.48, - "eval_loss": 3.368710994720459, - "eval_runtime": 149.7624, - "eval_samples_per_second": 12.293, - "eval_steps_per_second": 0.775, - "step": 27500 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032688921760531936, - "loss": 3.3579, - "step": 27505 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032680728265462885, - "loss": 3.2252, - "step": 27510 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003267253456881681, - "loss": 3.3385, - "step": 27515 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032664340671209846, - "loss": 3.4116, - "step": 27520 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032656146573258114, - "loss": 3.3403, - "step": 27525 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003264795227557776, - "loss": 3.1292, - "step": 27530 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032639757778784965, - "loss": 3.3242, - "step": 27535 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003263156308349593, - "loss": 3.259, - "step": 27540 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032623368190326827, - "loss": 3.4236, - "step": 27545 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003261517309989389, - "loss": 3.3361, - "step": 27550 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032606977812813323, - "loss": 3.4113, - "step": 27555 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032598782329701397, - "loss": 3.2926, - "step": 27560 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003259058665117436, - "loss": 3.3761, - "step": 27565 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032582390777848464, - "loss": 3.371, - "step": 27570 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032574194710340037, - "loss": 3.455, - "step": 27575 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032565998449265353, - "loss": 3.3533, - "step": 27580 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003255780199524073, - "loss": 3.4031, - "step": 27585 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003254960534888252, - "loss": 3.3147, - "step": 27590 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003254140851080704, - "loss": 3.2974, - "step": 27595 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003253321148163067, - "loss": 3.3253, - "step": 27600 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032525014261969786, - "loss": 3.4219, - "step": 27605 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032516816852440756, - "loss": 3.3182, - "step": 27610 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003250861925366001, - "loss": 3.4381, - "step": 27615 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032500421466243946, - "loss": 3.2992, - "step": 27620 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032492223490809, - "loss": 3.3346, - "step": 27625 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003248402532797163, - "loss": 3.4011, - "step": 27630 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032475826978348274, - "loss": 3.3957, - "step": 27635 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003246762844255542, - "loss": 3.4597, - "step": 27640 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032459429721209564, - "loss": 3.418, - "step": 27645 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032451230814927183, - "loss": 3.2963, - "step": 27650 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003244303172432482, - "loss": 3.2607, - "step": 27655 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032434832450018987, - "loss": 3.3302, - "step": 27660 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032426632992626234, - "loss": 3.3218, - "step": 27665 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032418433352763106, - "loss": 3.2973, - "step": 27670 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032410233531046185, - "loss": 3.2454, - "step": 27675 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003240203352809206, - "loss": 3.4339, - "step": 27680 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032393833344517334, - "loss": 3.3274, - "step": 27685 - }, - { - "epoch": 0.48, - "learning_rate": 0.000323856329809386, - "loss": 3.2768, - "step": 27690 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003237743243797249, - "loss": 3.2868, - "step": 27695 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003236923171623564, - "loss": 3.3539, - "step": 27700 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003236103081634472, - "loss": 3.2585, - "step": 27705 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032352829738916365, - "loss": 3.279, - "step": 27710 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003234462848456729, - "loss": 3.3725, - "step": 27715 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003233642705391416, - "loss": 3.3193, - "step": 27720 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003232822544757369, - "loss": 3.2873, - "step": 27725 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032320023666162606, - "loss": 3.2741, - "step": 27730 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003231182171029761, - "loss": 3.3445, - "step": 27735 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003230361958059549, - "loss": 3.1675, - "step": 27740 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032295417277672985, - "loss": 3.3555, - "step": 27745 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032287214802146854, - "loss": 3.3363, - "step": 27750 - }, - { - "epoch": 0.48, - "learning_rate": 0.000322790121546339, - "loss": 3.3608, - "step": 27755 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032270809335750895, - "loss": 3.3135, - "step": 27760 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032262606346114683, - "loss": 3.3241, - "step": 27765 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032254403186342067, - "loss": 3.3791, - "step": 27770 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032246199857049877, - "loss": 3.2657, - "step": 27775 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003223799635885498, - "loss": 3.3029, - "step": 27780 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003222979269237423, - "loss": 3.3314, - "step": 27785 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003222158885822447, - "loss": 3.1889, - "step": 27790 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032213384857022635, - "loss": 3.3532, - "step": 27795 - }, - { - "epoch": 0.48, - "learning_rate": 0.000322051806893856, - "loss": 3.2229, - "step": 27800 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003219697635593029, - "loss": 3.2576, - "step": 27805 - }, - { - "epoch": 0.48, - "learning_rate": 0.000321887718572736, - "loss": 3.4196, - "step": 27810 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032180567194032476, - "loss": 3.2642, - "step": 27815 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003217236236682389, - "loss": 3.3044, - "step": 27820 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003216415737626479, - "loss": 3.277, - "step": 27825 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003215595222297214, - "loss": 3.3251, - "step": 27830 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003214774690756293, - "loss": 3.291, - "step": 27835 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003213954143065416, - "loss": 3.3613, - "step": 27840 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032131335792862846, - "loss": 3.3399, - "step": 27845 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032123129994805993, - "loss": 3.2345, - "step": 27850 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003211492403710065, - "loss": 3.2515, - "step": 27855 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032106717920363855, - "loss": 3.369, - "step": 27860 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003209851164521268, - "loss": 3.2912, - "step": 27865 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032090305212264175, - "loss": 3.355, - "step": 27870 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032082098622135424, - "loss": 3.3553, - "step": 27875 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003207389187544354, - "loss": 3.3148, - "step": 27880 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003206568497280562, - "loss": 3.2627, - "step": 27885 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003205747791483876, - "loss": 3.388, - "step": 27890 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003204927070216011, - "loss": 3.2773, - "step": 27895 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003204106333538681, - "loss": 3.363, - "step": 27900 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032032855815136, - "loss": 3.442, - "step": 27905 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032024648142024866, - "loss": 3.2348, - "step": 27910 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032016440316670554, - "loss": 3.2679, - "step": 27915 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003200823233969027, - "loss": 3.2793, - "step": 27920 - }, - { - "epoch": 0.48, - "learning_rate": 0.00032000024211701207, - "loss": 3.2952, - "step": 27925 - }, - { - "epoch": 0.48, - "learning_rate": 0.00031991815933320574, - "loss": 3.1779, - "step": 27930 - }, - { - "epoch": 0.48, - "learning_rate": 0.00031983607505165585, - "loss": 3.2989, - "step": 27935 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003197539892785348, - "loss": 3.3453, - "step": 27940 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003196719020200151, - "loss": 3.2617, - "step": 27945 - }, - { - "epoch": 0.48, - "learning_rate": 0.000319589813282269, - "loss": 3.4275, - "step": 27950 - }, - { - "epoch": 0.48, - "learning_rate": 0.00031950772307146936, - "loss": 3.4124, - "step": 27955 - }, - { - "epoch": 0.48, - "learning_rate": 0.00031942563139378906, - "loss": 3.4406, - "step": 27960 - }, - { - "epoch": 0.48, - "learning_rate": 0.00031934353825540074, - "loss": 3.4792, - "step": 27965 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003192614436624774, - "loss": 3.3811, - "step": 27970 - }, - { - "epoch": 0.48, - "learning_rate": 0.00031917934762119236, - "loss": 3.3476, - "step": 27975 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003190972501377186, - "loss": 3.366, - "step": 27980 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003190151512182296, - "loss": 3.3788, - "step": 27985 - }, - { - "epoch": 0.48, - "learning_rate": 0.00031893305086889853, - "loss": 3.3044, - "step": 27990 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003188509490958991, - "loss": 3.3833, - "step": 27995 - }, - { - "epoch": 0.48, - "learning_rate": 0.00031876884590540495, - "loss": 3.3097, - "step": 28000 - }, - { - "epoch": 0.48, - "eval_loss": 3.3711462020874023, - "eval_runtime": 149.7727, - "eval_samples_per_second": 12.292, - "eval_steps_per_second": 0.775, - "step": 28000 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003186867413035898, - "loss": 3.2273, - "step": 28005 - }, - { - "epoch": 0.48, - "learning_rate": 0.0003186046352966274, - "loss": 3.2312, - "step": 28010 - }, - { - "epoch": 0.48, - "learning_rate": 0.00031852252789069184, - "loss": 3.3193, - "step": 28015 - }, - { - "epoch": 0.48, - "learning_rate": 0.000318440419091957, - "loss": 3.3245, - "step": 28020 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003183583089065973, - "loss": 3.3313, - "step": 28025 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031827619734078676, - "loss": 3.3352, - "step": 28030 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003181940844006999, - "loss": 3.3963, - "step": 28035 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031811197009251103, - "loss": 3.3619, - "step": 28040 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003180298544223949, - "loss": 3.3542, - "step": 28045 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031794773739652623, - "loss": 3.3766, - "step": 28050 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003178656190210795, - "loss": 3.4085, - "step": 28055 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003177834993022299, - "loss": 3.3648, - "step": 28060 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031770137824615227, - "loss": 3.4042, - "step": 28065 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003176192558590217, - "loss": 3.3609, - "step": 28070 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031753713214701334, - "loss": 3.4013, - "step": 28075 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031745500711630257, - "loss": 3.3054, - "step": 28080 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003173728807730647, - "loss": 3.373, - "step": 28085 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003172907531234752, - "loss": 3.3296, - "step": 28090 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031720862417370976, - "loss": 3.3439, - "step": 28095 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003171264939299439, - "loss": 3.3268, - "step": 28100 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031704436239835356, - "loss": 3.3792, - "step": 28105 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031696222958511443, - "loss": 3.3698, - "step": 28110 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031688009549640263, - "loss": 3.3603, - "step": 28115 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003167979601383941, - "loss": 3.2905, - "step": 28120 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031671582351726514, - "loss": 3.1407, - "step": 28125 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031663368563919183, - "loss": 3.3483, - "step": 28130 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003165515465103506, - "loss": 3.3586, - "step": 28135 - }, - { - "epoch": 0.49, - "learning_rate": 0.000316469406136918, - "loss": 3.3597, - "step": 28140 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003163872645250704, - "loss": 3.4144, - "step": 28145 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003163051216809845, - "loss": 3.3252, - "step": 28150 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031622297761083696, - "loss": 3.2158, - "step": 28155 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003161408323208047, - "loss": 3.268, - "step": 28160 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031605868581706457, - "loss": 3.2614, - "step": 28165 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003159765381057935, - "loss": 3.32, - "step": 28170 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031589438919316874, - "loss": 3.4281, - "step": 28175 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003158122390853673, - "loss": 3.4212, - "step": 28180 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003157300877885665, - "loss": 3.254, - "step": 28185 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031564793530894386, - "loss": 3.3829, - "step": 28190 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031556578165267647, - "loss": 3.2446, - "step": 28195 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031548362682594215, - "loss": 3.3505, - "step": 28200 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003154014708349186, - "loss": 3.3956, - "step": 28205 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003153193136857832, - "loss": 3.3643, - "step": 28210 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031523715538471394, - "loss": 3.2072, - "step": 28215 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003151549959378887, - "loss": 3.3384, - "step": 28220 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031507283535148553, - "loss": 3.3184, - "step": 28225 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003149906736316823, - "loss": 3.3089, - "step": 28230 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031490851078465717, - "loss": 3.2242, - "step": 28235 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031482634681658856, - "loss": 3.3506, - "step": 28240 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031474418173365465, - "loss": 3.2773, - "step": 28245 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003146620155420337, - "loss": 3.3073, - "step": 28250 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003145798482479044, - "loss": 3.3887, - "step": 28255 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031449767985744525, - "loss": 3.2862, - "step": 28260 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003144155103768349, - "loss": 3.3459, - "step": 28265 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031433333981225204, - "loss": 3.3802, - "step": 28270 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031425116816987536, - "loss": 3.3579, - "step": 28275 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031416899545588404, - "loss": 3.4191, - "step": 28280 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003140868216764568, - "loss": 3.3477, - "step": 28285 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003140046468377728, - "loss": 3.3956, - "step": 28290 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031392247094601104, - "loss": 3.4055, - "step": 28295 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031384029400735087, - "loss": 3.3708, - "step": 28300 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031375811602797155, - "loss": 3.4004, - "step": 28305 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031367593701405236, - "loss": 3.307, - "step": 28310 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031359375697177284, - "loss": 3.3867, - "step": 28315 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031351157590731244, - "loss": 3.3628, - "step": 28320 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003134293938268508, - "loss": 3.3526, - "step": 28325 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031334721073656763, - "loss": 3.2619, - "step": 28330 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003132650266426425, - "loss": 3.2303, - "step": 28335 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031318284155125534, - "loss": 3.3438, - "step": 28340 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031310065546858606, - "loss": 3.3175, - "step": 28345 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031301846840081475, - "loss": 3.348, - "step": 28350 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003129362803541213, - "loss": 3.4054, - "step": 28355 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031285409133468574, - "loss": 3.2905, - "step": 28360 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031277190134868855, - "loss": 3.3457, - "step": 28365 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031268971040230973, - "loss": 3.2979, - "step": 28370 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031260751850172974, - "loss": 3.3189, - "step": 28375 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031252532565312894, - "loss": 3.2012, - "step": 28380 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003124431318626879, - "loss": 3.2903, - "step": 28385 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031236093713658705, - "loss": 3.394, - "step": 28390 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003122787414810072, - "loss": 3.376, - "step": 28395 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003121965449021288, - "loss": 3.424, - "step": 28400 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003121143474061328, - "loss": 3.2631, - "step": 28405 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003120321489991999, - "loss": 3.3431, - "step": 28410 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003119499496875111, - "loss": 3.3405, - "step": 28415 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003118677494772473, - "loss": 3.288, - "step": 28420 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003117855483745896, - "loss": 3.3591, - "step": 28425 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003117033463857191, - "loss": 3.3612, - "step": 28430 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003116211435168168, - "loss": 3.2559, - "step": 28435 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031153893977406405, - "loss": 3.3729, - "step": 28440 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031145673516364225, - "loss": 3.3824, - "step": 28445 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031137452969173275, - "loss": 3.3824, - "step": 28450 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031129232336451676, - "loss": 3.3525, - "step": 28455 - }, - { - "epoch": 0.49, - "learning_rate": 0.000311210116188176, - "loss": 3.2539, - "step": 28460 - }, - { - "epoch": 0.49, - "learning_rate": 0.000311127908168892, - "loss": 3.2978, - "step": 28465 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003110456993128464, - "loss": 3.2967, - "step": 28470 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031096348962622063, - "loss": 3.352, - "step": 28475 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031088127911519666, - "loss": 3.3765, - "step": 28480 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031079906778595627, - "loss": 3.4548, - "step": 28485 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031071685564468146, - "loss": 3.2177, - "step": 28490 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031063464269755393, - "loss": 3.2067, - "step": 28495 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031055242895075574, - "loss": 3.2593, - "step": 28500 - }, - { - "epoch": 0.49, - "eval_loss": 3.359267473220825, - "eval_runtime": 149.9726, - "eval_samples_per_second": 12.276, - "eval_steps_per_second": 0.773, - "step": 28500 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031047021441046906, - "loss": 3.3636, - "step": 28505 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003103879990828759, - "loss": 3.3139, - "step": 28510 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031030578297415836, - "loss": 3.379, - "step": 28515 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003102235660904988, - "loss": 3.3304, - "step": 28520 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031014134843807944, - "loss": 3.3501, - "step": 28525 - }, - { - "epoch": 0.49, - "learning_rate": 0.00031005913002308275, - "loss": 3.3744, - "step": 28530 - }, - { - "epoch": 0.49, - "learning_rate": 0.000309976910851691, - "loss": 3.2913, - "step": 28535 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003098946909300866, - "loss": 3.2803, - "step": 28540 - }, - { - "epoch": 0.49, - "learning_rate": 0.00030981247026445224, - "loss": 3.3851, - "step": 28545 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003097302488609704, - "loss": 3.2995, - "step": 28550 - }, - { - "epoch": 0.49, - "learning_rate": 0.00030964802672582364, - "loss": 3.356, - "step": 28555 - }, - { - "epoch": 0.49, - "learning_rate": 0.00030956580386519477, - "loss": 3.2917, - "step": 28560 - }, - { - "epoch": 0.49, - "learning_rate": 0.00030948358028526644, - "loss": 3.3783, - "step": 28565 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003094013559922215, - "loss": 3.2218, - "step": 28570 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003093191309922428, - "loss": 3.163, - "step": 28575 - }, - { - "epoch": 0.49, - "learning_rate": 0.00030923690529151315, - "loss": 3.3771, - "step": 28580 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003091546788962156, - "loss": 3.3725, - "step": 28585 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003090724518125331, - "loss": 3.3931, - "step": 28590 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003089902240466487, - "loss": 3.3111, - "step": 28595 - }, - { - "epoch": 0.49, - "learning_rate": 0.0003089079956047456, - "loss": 3.3732, - "step": 28600 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030882576649300675, - "loss": 3.442, - "step": 28605 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003087435367176157, - "loss": 3.393, - "step": 28610 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003086613062847553, - "loss": 3.3046, - "step": 28615 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003085790752006091, - "loss": 3.3409, - "step": 28620 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003084968434713605, - "loss": 3.3822, - "step": 28625 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003084146111031927, - "loss": 3.2859, - "step": 28630 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003083323781022894, - "loss": 3.3541, - "step": 28635 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030825014447483383, - "loss": 3.286, - "step": 28640 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003081679102270098, - "loss": 3.3848, - "step": 28645 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030808567536500073, - "loss": 3.3427, - "step": 28650 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003080034398949903, - "loss": 3.2654, - "step": 28655 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003079212038231622, - "loss": 3.2767, - "step": 28660 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030783896715570015, - "loss": 3.2923, - "step": 28665 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030775672989878796, - "loss": 3.2185, - "step": 28670 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003076744920586095, - "loss": 3.317, - "step": 28675 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030759225364134846, - "loss": 3.2964, - "step": 28680 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003075100146531889, - "loss": 3.3482, - "step": 28685 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003074277751003148, - "loss": 3.456, - "step": 28690 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030734553498891, - "loss": 3.3257, - "step": 28695 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030726329432515865, - "loss": 3.4125, - "step": 28700 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030718105311524475, - "loss": 3.3209, - "step": 28705 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030709881136535246, - "loss": 3.2977, - "step": 28710 - }, - { - "epoch": 0.5, - "learning_rate": 0.000307016569081666, - "loss": 3.4025, - "step": 28715 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003069343262703694, - "loss": 3.2998, - "step": 28720 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030685208293764713, - "loss": 3.3609, - "step": 28725 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030676983908968324, - "loss": 3.3702, - "step": 28730 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030668759473266214, - "loss": 3.3413, - "step": 28735 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030660534987276823, - "loss": 3.3318, - "step": 28740 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030652310451618586, - "loss": 3.3335, - "step": 28745 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030644085866909953, - "loss": 3.1442, - "step": 28750 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030635861233769353, - "loss": 3.4118, - "step": 28755 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030627636552815247, - "loss": 3.3533, - "step": 28760 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003061941182466609, - "loss": 3.3464, - "step": 28765 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003061118704994035, - "loss": 3.3673, - "step": 28770 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003060296222925646, - "loss": 3.2905, - "step": 28775 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003059473736323291, - "loss": 3.3231, - "step": 28780 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003058651245248815, - "loss": 3.3518, - "step": 28785 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030578287497640675, - "loss": 3.2899, - "step": 28790 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003057006249930893, - "loss": 3.4217, - "step": 28795 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003056183745811141, - "loss": 3.3105, - "step": 28800 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030553612374666593, - "loss": 3.3716, - "step": 28805 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030545387249592964, - "loss": 3.3545, - "step": 28810 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030537162083509007, - "loss": 3.2589, - "step": 28815 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003052893687703321, - "loss": 3.3462, - "step": 28820 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030520711630784074, - "loss": 3.3051, - "step": 28825 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030512486345380096, - "loss": 3.2761, - "step": 28830 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003050426102143976, - "loss": 3.2527, - "step": 28835 - }, - { - "epoch": 0.5, - "learning_rate": 0.000304960356595816, - "loss": 3.2873, - "step": 28840 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030487810260424077, - "loss": 3.3043, - "step": 28845 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003047958482458574, - "loss": 3.2753, - "step": 28850 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003047135935268508, - "loss": 3.4139, - "step": 28855 - }, - { - "epoch": 0.5, - "learning_rate": 0.000304631338453406, - "loss": 3.2713, - "step": 28860 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030454908303170845, - "loss": 3.2992, - "step": 28865 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003044668272679431, - "loss": 3.3822, - "step": 28870 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030438457116829516, - "loss": 3.3267, - "step": 28875 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030430231473895, - "loss": 3.2699, - "step": 28880 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030422005798609277, - "loss": 3.2666, - "step": 28885 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003041378009159089, - "loss": 3.3945, - "step": 28890 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030405554353458357, - "loss": 3.2789, - "step": 28895 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030397328584830214, - "loss": 3.3467, - "step": 28900 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030389102786325, - "loss": 3.3268, - "step": 28905 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003038087695856125, - "loss": 3.3759, - "step": 28910 - }, - { - "epoch": 0.5, - "learning_rate": 0.000303726511021575, - "loss": 3.3726, - "step": 28915 - }, - { - "epoch": 0.5, - "learning_rate": 0.000303644252177323, - "loss": 3.3361, - "step": 28920 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030356199305904183, - "loss": 3.381, - "step": 28925 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003034797336729173, - "loss": 3.35, - "step": 28930 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003033974740251343, - "loss": 3.3854, - "step": 28935 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003033152141218788, - "loss": 3.359, - "step": 28940 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003032329539693362, - "loss": 3.2957, - "step": 28945 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003031506935736921, - "loss": 3.2719, - "step": 28950 - }, - { - "epoch": 0.5, - "learning_rate": 0.000303068432941132, - "loss": 3.2425, - "step": 28955 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030298617207784133, - "loss": 3.3014, - "step": 28960 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030290391099000585, - "loss": 3.2075, - "step": 28965 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030282164968381125, - "loss": 3.3241, - "step": 28970 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030273938816544293, - "loss": 3.2996, - "step": 28975 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003026571264410867, - "loss": 3.3114, - "step": 28980 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003025748645169282, - "loss": 3.2326, - "step": 28985 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003024926023991531, - "loss": 3.3887, - "step": 28990 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030241034009394706, - "loss": 3.4096, - "step": 28995 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003023280776074957, - "loss": 3.2992, - "step": 29000 - }, - { - "epoch": 0.5, - "eval_loss": 3.3513615131378174, - "eval_runtime": 150.0716, - "eval_samples_per_second": 12.267, - "eval_steps_per_second": 0.773, - "step": 29000 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030224581494598497, - "loss": 3.2705, - "step": 29005 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030216355211560046, - "loss": 3.1582, - "step": 29010 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003020812891225279, - "loss": 3.3338, - "step": 29015 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030199902597295307, - "loss": 3.355, - "step": 29020 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030191676267306164, - "loss": 3.3007, - "step": 29025 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003018344992290396, - "loss": 3.3631, - "step": 29030 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030175223564707257, - "loss": 3.3573, - "step": 29035 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003016699719333463, - "loss": 3.3362, - "step": 29040 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030158770809404686, - "loss": 3.324, - "step": 29045 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030150544413535987, - "loss": 3.3532, - "step": 29050 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003014231800634711, - "loss": 3.3602, - "step": 29055 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030134091588456663, - "loss": 3.3985, - "step": 29060 - }, - { - "epoch": 0.5, - "learning_rate": 0.000301258651604832, - "loss": 3.3109, - "step": 29065 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030117638723045333, - "loss": 3.234, - "step": 29070 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030109412276761635, - "loss": 3.3347, - "step": 29075 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003010118582225069, - "loss": 3.334, - "step": 29080 - }, - { - "epoch": 0.5, - "learning_rate": 0.000300929593601311, - "loss": 3.5063, - "step": 29085 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003008473289102143, - "loss": 3.2729, - "step": 29090 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003007650641554029, - "loss": 3.3154, - "step": 29095 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030068279934306253, - "loss": 3.2814, - "step": 29100 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030060053447937915, - "loss": 3.2991, - "step": 29105 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030051826957053874, - "loss": 3.2739, - "step": 29110 - }, - { - "epoch": 0.5, - "learning_rate": 0.000300436004622727, - "loss": 3.3853, - "step": 29115 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003003537396421301, - "loss": 3.3941, - "step": 29120 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030027147463493364, - "loss": 3.2138, - "step": 29125 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030018920960732376, - "loss": 3.2438, - "step": 29130 - }, - { - "epoch": 0.5, - "learning_rate": 0.0003001069445654863, - "loss": 3.1981, - "step": 29135 - }, - { - "epoch": 0.5, - "learning_rate": 0.00030002467951560706, - "loss": 3.2727, - "step": 29140 - }, - { - "epoch": 0.5, - "learning_rate": 0.00029994241446387206, - "loss": 3.3465, - "step": 29145 - }, - { - "epoch": 0.5, - "learning_rate": 0.0002998601494164673, - "loss": 3.3148, - "step": 29150 - }, - { - "epoch": 0.5, - "learning_rate": 0.0002997778843795786, - "loss": 3.2841, - "step": 29155 - }, - { - "epoch": 0.5, - "learning_rate": 0.00029969561935939173, - "loss": 3.3489, - "step": 29160 - }, - { - "epoch": 0.5, - "learning_rate": 0.0002996133543620928, - "loss": 3.2372, - "step": 29165 - }, - { - "epoch": 0.5, - "learning_rate": 0.0002995310893938676, - "loss": 3.2644, - "step": 29170 - }, - { - "epoch": 0.5, - "learning_rate": 0.00029944882446090207, - "loss": 3.2412, - "step": 29175 - }, - { - "epoch": 0.5, - "learning_rate": 0.0002993665595693822, - "loss": 3.3525, - "step": 29180 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002992842947254937, - "loss": 3.2611, - "step": 29185 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029920202993542267, - "loss": 3.3687, - "step": 29190 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002991197652053548, - "loss": 3.4074, - "step": 29195 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002990375005414761, - "loss": 3.3301, - "step": 29200 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002989552359499725, - "loss": 3.2824, - "step": 29205 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002988729714370297, - "loss": 3.3212, - "step": 29210 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002987907070088338, - "loss": 3.3241, - "step": 29215 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002987084426715704, - "loss": 3.3126, - "step": 29220 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002986261784314255, - "loss": 3.2104, - "step": 29225 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002985439142945851, - "loss": 3.3995, - "step": 29230 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002984616502672348, - "loss": 3.3807, - "step": 29235 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029837938635556043, - "loss": 3.2622, - "step": 29240 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002982971225657479, - "loss": 3.2761, - "step": 29245 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029821485890398313, - "loss": 3.3754, - "step": 29250 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029813259537645186, - "loss": 3.4256, - "step": 29255 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002980503319893397, - "loss": 3.3391, - "step": 29260 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029796806874883265, - "loss": 3.3717, - "step": 29265 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029788580566111645, - "loss": 3.3585, - "step": 29270 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002978035427323767, - "loss": 3.3437, - "step": 29275 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002977212799687995, - "loss": 3.3646, - "step": 29280 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029763901737657017, - "loss": 3.2832, - "step": 29285 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002975567549618747, - "loss": 3.1794, - "step": 29290 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002974744927308987, - "loss": 3.3764, - "step": 29295 - }, - { - "epoch": 0.51, - "learning_rate": 0.000297392230689828, - "loss": 3.3258, - "step": 29300 - }, - { - "epoch": 0.51, - "learning_rate": 0.000297309968844848, - "loss": 3.3501, - "step": 29305 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002972277072021448, - "loss": 3.3172, - "step": 29310 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002971454457679037, - "loss": 3.3571, - "step": 29315 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002970631845483104, - "loss": 3.2708, - "step": 29320 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002969809235495506, - "loss": 3.4065, - "step": 29325 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029689866277780993, - "loss": 3.2756, - "step": 29330 - }, - { - "epoch": 0.51, - "learning_rate": 0.000296816402239274, - "loss": 3.3239, - "step": 29335 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029673414194012816, - "loss": 3.284, - "step": 29340 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029665188188655807, - "loss": 3.3406, - "step": 29345 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002965696220847494, - "loss": 3.338, - "step": 29350 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002964873625408875, - "loss": 3.3738, - "step": 29355 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002964051032611581, - "loss": 3.1847, - "step": 29360 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002963228442517463, - "loss": 3.3021, - "step": 29365 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002962405855188378, - "loss": 3.3367, - "step": 29370 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029615832706861815, - "loss": 3.2833, - "step": 29375 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029607606890727246, - "loss": 3.2644, - "step": 29380 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002959938110409864, - "loss": 3.4083, - "step": 29385 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002959115534759452, - "loss": 3.3432, - "step": 29390 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002958292962183342, - "loss": 3.2759, - "step": 29395 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002957470392743387, - "loss": 3.2871, - "step": 29400 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029566478265014403, - "loss": 3.2679, - "step": 29405 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029558252635193563, - "loss": 3.3223, - "step": 29410 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002955002703858986, - "loss": 3.2751, - "step": 29415 - }, - { - "epoch": 0.51, - "learning_rate": 0.000295418014758218, - "loss": 3.3319, - "step": 29420 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029533575947507927, - "loss": 3.3044, - "step": 29425 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002952535045426676, - "loss": 3.2715, - "step": 29430 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029517124996716816, - "loss": 3.147, - "step": 29435 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002950889957547658, - "loss": 3.3518, - "step": 29440 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002950067419116457, - "loss": 3.2539, - "step": 29445 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029492448844399327, - "loss": 3.26, - "step": 29450 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002948422353579931, - "loss": 3.3907, - "step": 29455 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002947599826598306, - "loss": 3.299, - "step": 29460 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002946777303556904, - "loss": 3.3628, - "step": 29465 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002945954784517575, - "loss": 3.2462, - "step": 29470 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029451322695421713, - "loss": 3.3751, - "step": 29475 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002944309758692538, - "loss": 3.2978, - "step": 29480 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029434872520305277, - "loss": 3.3679, - "step": 29485 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029426647496179847, - "loss": 3.2476, - "step": 29490 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029418422515167596, - "loss": 3.3317, - "step": 29495 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002941019757788698, - "loss": 3.3763, - "step": 29500 - }, - { - "epoch": 0.51, - "eval_loss": 3.340322494506836, - "eval_runtime": 149.8765, - "eval_samples_per_second": 12.283, - "eval_steps_per_second": 0.774, - "step": 29500 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002940197268495649, - "loss": 3.2741, - "step": 29505 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029393747836994595, - "loss": 3.2172, - "step": 29510 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002938552303461975, - "loss": 3.282, - "step": 29515 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002937729827845042, - "loss": 3.3107, - "step": 29520 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029369073569105066, - "loss": 3.1937, - "step": 29525 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029360848907202154, - "loss": 3.3407, - "step": 29530 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002935262429336013, - "loss": 3.3098, - "step": 29535 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002934439972819744, - "loss": 3.4239, - "step": 29540 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002933617521233252, - "loss": 3.2941, - "step": 29545 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002932795074638383, - "loss": 3.1586, - "step": 29550 - }, - { - "epoch": 0.51, - "learning_rate": 0.000293197263309698, - "loss": 3.3078, - "step": 29555 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002931150196670886, - "loss": 3.303, - "step": 29560 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029303277654219457, - "loss": 3.1798, - "step": 29565 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002929505339411999, - "loss": 3.3173, - "step": 29570 - }, - { - "epoch": 0.51, - "learning_rate": 0.000292868291870289, - "loss": 3.4086, - "step": 29575 - }, - { - "epoch": 0.51, - "learning_rate": 0.000292786050335646, - "loss": 3.4117, - "step": 29580 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029270380934345495, - "loss": 3.1563, - "step": 29585 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029262156889990024, - "loss": 3.3599, - "step": 29590 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002925393290111656, - "loss": 3.3668, - "step": 29595 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029245708968343514, - "loss": 3.3687, - "step": 29600 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029237485092289294, - "loss": 3.2999, - "step": 29605 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002922926127357229, - "loss": 3.2961, - "step": 29610 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029221037512810886, - "loss": 3.2225, - "step": 29615 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029212813810623473, - "loss": 3.2368, - "step": 29620 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002920459016762842, - "loss": 3.2402, - "step": 29625 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002919636658444411, - "loss": 3.3306, - "step": 29630 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029188143061688917, - "loss": 3.2542, - "step": 29635 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029179919599981217, - "loss": 3.3902, - "step": 29640 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002917169619993934, - "loss": 3.1883, - "step": 29645 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002916347286218167, - "loss": 3.3464, - "step": 29650 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002915524958732656, - "loss": 3.254, - "step": 29655 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002914702637599234, - "loss": 3.3093, - "step": 29660 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029138803228797384, - "loss": 3.3814, - "step": 29665 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029130580146359997, - "loss": 3.3697, - "step": 29670 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029122357129298537, - "loss": 3.3207, - "step": 29675 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002911413417823131, - "loss": 3.3784, - "step": 29680 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029105911293776654, - "loss": 3.326, - "step": 29685 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029097688476552904, - "loss": 3.4872, - "step": 29690 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002908946572717834, - "loss": 3.2418, - "step": 29695 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002908124304627129, - "loss": 3.383, - "step": 29700 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002907302043445005, - "loss": 3.4032, - "step": 29705 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002906479789233293, - "loss": 3.2928, - "step": 29710 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002905657542053822, - "loss": 3.2659, - "step": 29715 - }, - { - "epoch": 0.51, - "learning_rate": 0.000290483530196842, - "loss": 3.3656, - "step": 29720 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002904013069038915, - "loss": 3.3738, - "step": 29725 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029031908433271354, - "loss": 3.3268, - "step": 29730 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029023686248949083, - "loss": 3.2683, - "step": 29735 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029015464138040613, - "loss": 3.447, - "step": 29740 - }, - { - "epoch": 0.51, - "learning_rate": 0.00029007242101164183, - "loss": 3.2916, - "step": 29745 - }, - { - "epoch": 0.51, - "learning_rate": 0.0002899902013893806, - "loss": 3.2699, - "step": 29750 - }, - { - "epoch": 0.51, - "learning_rate": 0.00028990798251980494, - "loss": 3.319, - "step": 29755 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028982576440909723, - "loss": 3.2191, - "step": 29760 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028974354706344003, - "loss": 3.3557, - "step": 29765 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002896613304890154, - "loss": 3.3007, - "step": 29770 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028957911469200577, - "loss": 3.3596, - "step": 29775 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028949689967859327, - "loss": 3.3322, - "step": 29780 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002894146854549601, - "loss": 3.4363, - "step": 29785 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002893324720272884, - "loss": 3.3202, - "step": 29790 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002892502594017601, - "loss": 3.3451, - "step": 29795 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002891680475845572, - "loss": 3.3622, - "step": 29800 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002890858365818616, - "loss": 3.2766, - "step": 29805 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002890036263998551, - "loss": 3.348, - "step": 29810 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002889214170447196, - "loss": 3.3661, - "step": 29815 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028883920852263686, - "loss": 3.275, - "step": 29820 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002887570008397883, - "loss": 3.3184, - "step": 29825 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028867479400235567, - "loss": 3.154, - "step": 29830 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002885925880165205, - "loss": 3.2861, - "step": 29835 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002885103828884642, - "loss": 3.2612, - "step": 29840 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002884281786243684, - "loss": 3.3055, - "step": 29845 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028834597523041413, - "loss": 3.3218, - "step": 29850 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028826377271278284, - "loss": 3.3651, - "step": 29855 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028818157107765563, - "loss": 3.3722, - "step": 29860 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002880993703312137, - "loss": 3.3252, - "step": 29865 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002880171704796383, - "loss": 3.2013, - "step": 29870 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002879349715291101, - "loss": 3.3523, - "step": 29875 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028785277348581024, - "loss": 3.2081, - "step": 29880 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002877705763559196, - "loss": 3.3849, - "step": 29885 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028768838014561884, - "loss": 3.3127, - "step": 29890 - }, - { - "epoch": 0.52, - "learning_rate": 0.000287606184861089, - "loss": 3.4062, - "step": 29895 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002875239905085104, - "loss": 3.2684, - "step": 29900 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002874417970940638, - "loss": 3.4488, - "step": 29905 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028735960462392964, - "loss": 3.3177, - "step": 29910 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002872774131042885, - "loss": 3.2003, - "step": 29915 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002871952225413208, - "loss": 3.2894, - "step": 29920 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028711303294120654, - "loss": 3.2948, - "step": 29925 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002870308443101262, - "loss": 3.2572, - "step": 29930 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028694865665425997, - "loss": 3.309, - "step": 29935 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028686646997978777, - "loss": 3.3262, - "step": 29940 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002867842842928899, - "loss": 3.3213, - "step": 29945 - }, - { - "epoch": 0.52, - "learning_rate": 0.000286702099599746, - "loss": 3.1783, - "step": 29950 - }, - { - "epoch": 0.52, - "learning_rate": 0.000286619915906536, - "loss": 3.3147, - "step": 29955 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002865377332194399, - "loss": 3.2999, - "step": 29960 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002864555515446371, - "loss": 3.1541, - "step": 29965 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002863733708883076, - "loss": 3.2152, - "step": 29970 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028629119125663064, - "loss": 3.2402, - "step": 29975 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002862090126557859, - "loss": 3.4334, - "step": 29980 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028612683509195267, - "loss": 3.2809, - "step": 29985 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028604465857131033, - "loss": 3.4323, - "step": 29990 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002859624831000383, - "loss": 3.3852, - "step": 29995 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002858803086843155, - "loss": 3.3623, - "step": 30000 - }, - { - "epoch": 0.52, - "eval_loss": 3.3538341522216797, - "eval_runtime": 149.973, - "eval_samples_per_second": 12.276, - "eval_steps_per_second": 0.773, - "step": 30000 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002857981353303211, - "loss": 3.3106, - "step": 30005 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002857159630442341, - "loss": 3.4119, - "step": 30010 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028563379183223355, - "loss": 3.3264, - "step": 30015 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002855516217004982, - "loss": 3.2663, - "step": 30020 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028546945265520694, - "loss": 3.349, - "step": 30025 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028538728470253825, - "loss": 3.3401, - "step": 30030 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002853051178486709, - "loss": 3.4039, - "step": 30035 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028522295209978336, - "loss": 3.3008, - "step": 30040 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002851407874620541, - "loss": 3.356, - "step": 30045 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028505862394166156, - "loss": 3.1896, - "step": 30050 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028497646154478383, - "loss": 3.3458, - "step": 30055 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002848943002775992, - "loss": 3.3157, - "step": 30060 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002848121401462858, - "loss": 3.3277, - "step": 30065 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028472998115702157, - "loss": 3.4176, - "step": 30070 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028464782331598463, - "loss": 3.3079, - "step": 30075 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028456566662935265, - "loss": 3.2774, - "step": 30080 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002844835111033034, - "loss": 3.3172, - "step": 30085 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028440135674401453, - "loss": 3.1986, - "step": 30090 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002843192035576638, - "loss": 3.3215, - "step": 30095 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002842370515504286, - "loss": 3.3483, - "step": 30100 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028415490072848627, - "loss": 3.3005, - "step": 30105 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002840727510980142, - "loss": 3.2421, - "step": 30110 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002839906026651897, - "loss": 3.2194, - "step": 30115 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002839084554361897, - "loss": 3.3219, - "step": 30120 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002838263094171916, - "loss": 3.2303, - "step": 30125 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028374416461437194, - "loss": 3.226, - "step": 30130 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028366202103390786, - "loss": 3.2418, - "step": 30135 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002835798786819761, - "loss": 3.1874, - "step": 30140 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002834977375647532, - "loss": 3.3188, - "step": 30145 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002834155976884161, - "loss": 3.1829, - "step": 30150 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002833334590591408, - "loss": 3.3849, - "step": 30155 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028325132168310413, - "loss": 3.3192, - "step": 30160 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002831691855664821, - "loss": 3.3352, - "step": 30165 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002830870507154511, - "loss": 3.3424, - "step": 30170 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002830049171361873, - "loss": 3.23, - "step": 30175 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028292278483486665, - "loss": 3.257, - "step": 30180 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002828406538176649, - "loss": 3.3752, - "step": 30185 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002827585240907581, - "loss": 3.2752, - "step": 30190 - }, - { - "epoch": 0.52, - "learning_rate": 0.000282676395660322, - "loss": 3.2853, - "step": 30195 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028259426853253215, - "loss": 3.3301, - "step": 30200 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028251214271356406, - "loss": 3.258, - "step": 30205 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002824300182095932, - "loss": 3.2661, - "step": 30210 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028234789502679493, - "loss": 3.3113, - "step": 30215 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002822657731713445, - "loss": 3.327, - "step": 30220 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002821836526494171, - "loss": 3.2908, - "step": 30225 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002821015334671877, - "loss": 3.3357, - "step": 30230 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028201941563083113, - "loss": 3.394, - "step": 30235 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002819372991465225, - "loss": 3.3686, - "step": 30240 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002818551840204363, - "loss": 3.4033, - "step": 30245 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028177307025874725, - "loss": 3.3786, - "step": 30250 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028169095786763007, - "loss": 3.2397, - "step": 30255 - }, - { - "epoch": 0.52, - "learning_rate": 0.000281608846853259, - "loss": 3.3121, - "step": 30260 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002815267372218083, - "loss": 3.2628, - "step": 30265 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028144462897945223, - "loss": 3.3744, - "step": 30270 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002813625221323651, - "loss": 3.2426, - "step": 30275 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002812804166867208, - "loss": 3.4355, - "step": 30280 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002811983126486931, - "loss": 3.245, - "step": 30285 - }, - { - "epoch": 0.52, - "learning_rate": 0.000281116210024456, - "loss": 3.3295, - "step": 30290 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002810341088201831, - "loss": 3.3174, - "step": 30295 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028095200904204804, - "loss": 3.1724, - "step": 30300 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002808699106962244, - "loss": 3.2641, - "step": 30305 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028078781378888524, - "loss": 3.2328, - "step": 30310 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028070571832620404, - "loss": 3.2577, - "step": 30315 - }, - { - "epoch": 0.52, - "learning_rate": 0.00028062362431435404, - "loss": 3.3785, - "step": 30320 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002805415317595081, - "loss": 3.2063, - "step": 30325 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002804594406678394, - "loss": 3.3733, - "step": 30330 - }, - { - "epoch": 0.52, - "learning_rate": 0.0002803773510455205, - "loss": 3.3888, - "step": 30335 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002802952628987243, - "loss": 3.2477, - "step": 30340 - }, - { - "epoch": 0.53, - "learning_rate": 0.00028021317623362326, - "loss": 3.3568, - "step": 30345 - }, - { - "epoch": 0.53, - "learning_rate": 0.00028013109105638995, - "loss": 3.284, - "step": 30350 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002800490073731969, - "loss": 3.3265, - "step": 30355 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002799669251902162, - "loss": 3.242, - "step": 30360 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027988484451361994, - "loss": 3.391, - "step": 30365 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027980276534958037, - "loss": 3.3313, - "step": 30370 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027972068770426936, - "loss": 3.2538, - "step": 30375 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002796386115838587, - "loss": 3.3219, - "step": 30380 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027955653699452006, - "loss": 3.3282, - "step": 30385 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002794744639424251, - "loss": 3.3222, - "step": 30390 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027939239243374517, - "loss": 3.3605, - "step": 30395 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002793103224746518, - "loss": 3.296, - "step": 30400 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027922825407131625, - "loss": 3.3659, - "step": 30405 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002791461872299094, - "loss": 3.4053, - "step": 30410 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027906412195660235, - "loss": 3.3414, - "step": 30415 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027898205825756614, - "loss": 3.2646, - "step": 30420 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027889999613897137, - "loss": 3.2449, - "step": 30425 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027881793560698885, - "loss": 3.3722, - "step": 30430 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027873587666778893, - "loss": 3.2959, - "step": 30435 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027865381932754214, - "loss": 3.2453, - "step": 30440 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002785717635924187, - "loss": 3.305, - "step": 30445 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027848970946858884, - "loss": 3.337, - "step": 30450 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002784076569622227, - "loss": 3.2714, - "step": 30455 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027832560607948993, - "loss": 3.3498, - "step": 30460 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027824355682656057, - "loss": 3.2591, - "step": 30465 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002781615092096042, - "loss": 3.3538, - "step": 30470 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002780794632347904, - "loss": 3.3029, - "step": 30475 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027799741890828874, - "loss": 3.4062, - "step": 30480 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027791537623626843, - "loss": 3.2511, - "step": 30485 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002778333352248985, - "loss": 3.2801, - "step": 30490 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027775129588034823, - "loss": 3.3308, - "step": 30495 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027766925820878647, - "loss": 3.3449, - "step": 30500 - }, - { - "epoch": 0.53, - "eval_loss": 3.326901435852051, - "eval_runtime": 149.9761, - "eval_samples_per_second": 12.275, - "eval_steps_per_second": 0.773, - "step": 30500 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027758722221638205, - "loss": 3.2604, - "step": 30505 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002775051879093038, - "loss": 3.3171, - "step": 30510 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027742315529371994, - "loss": 3.3948, - "step": 30515 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027734112437579917, - "loss": 3.3968, - "step": 30520 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027725909516170964, - "loss": 3.2516, - "step": 30525 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027717706765761967, - "loss": 3.323, - "step": 30530 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027709504186969737, - "loss": 3.3059, - "step": 30535 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027701301780411045, - "loss": 3.2289, - "step": 30540 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002769309954670267, - "loss": 3.3094, - "step": 30545 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002768489748646139, - "loss": 3.3621, - "step": 30550 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027676695600303955, - "loss": 3.409, - "step": 30555 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002766849388884711, - "loss": 3.3604, - "step": 30560 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027660292352707576, - "loss": 3.2526, - "step": 30565 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002765209099250206, - "loss": 3.2604, - "step": 30570 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002764388980884726, - "loss": 3.269, - "step": 30575 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002763568880235989, - "loss": 3.3991, - "step": 30580 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027627487973656606, - "loss": 3.368, - "step": 30585 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002761928732335405, - "loss": 3.2534, - "step": 30590 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002761108685206889, - "loss": 3.2685, - "step": 30595 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027602886560417763, - "loss": 3.3091, - "step": 30600 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027594686449017276, - "loss": 3.3925, - "step": 30605 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027586486518484056, - "loss": 3.3245, - "step": 30610 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027578286769434663, - "loss": 3.3573, - "step": 30615 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027570087202485703, - "loss": 3.3753, - "step": 30620 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027561887818253726, - "loss": 3.287, - "step": 30625 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002755368861735529, - "loss": 3.3379, - "step": 30630 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002754548960040694, - "loss": 3.2588, - "step": 30635 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002753729076802519, - "loss": 3.2747, - "step": 30640 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002752909212082655, - "loss": 3.2554, - "step": 30645 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027520893659427516, - "loss": 3.3417, - "step": 30650 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027512695384444575, - "loss": 3.2509, - "step": 30655 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027504497296494207, - "loss": 3.2155, - "step": 30660 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027496299396192846, - "loss": 3.3851, - "step": 30665 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002748810168415693, - "loss": 3.2289, - "step": 30670 - }, - { - "epoch": 0.53, - "learning_rate": 0.000274799041610029, - "loss": 3.3077, - "step": 30675 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002747170682734716, - "loss": 3.2613, - "step": 30680 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002746350968380613, - "loss": 3.3562, - "step": 30685 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002745531273099615, - "loss": 3.2328, - "step": 30690 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002744711596953361, - "loss": 3.3524, - "step": 30695 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002743891940003487, - "loss": 3.3606, - "step": 30700 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027430723023116264, - "loss": 3.2986, - "step": 30705 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002742252683939412, - "loss": 3.2556, - "step": 30710 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002741433084948476, - "loss": 3.28, - "step": 30715 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027406135054004456, - "loss": 3.2094, - "step": 30720 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027397939453569506, - "loss": 3.393, - "step": 30725 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002738974404879617, - "loss": 3.2284, - "step": 30730 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027381548840300706, - "loss": 3.2894, - "step": 30735 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002737335382869936, - "loss": 3.3592, - "step": 30740 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002736515901460834, - "loss": 3.2587, - "step": 30745 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027356964398643854, - "loss": 3.2823, - "step": 30750 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027348769981422094, - "loss": 3.3475, - "step": 30755 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027340575763559256, - "loss": 3.3608, - "step": 30760 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002733238174567149, - "loss": 3.3273, - "step": 30765 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002732418792837494, - "loss": 3.241, - "step": 30770 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027315994312285746, - "loss": 3.293, - "step": 30775 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027307800898020027, - "loss": 3.1938, - "step": 30780 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027299607686193876, - "loss": 3.3112, - "step": 30785 - }, - { - "epoch": 0.53, - "learning_rate": 0.000272914146774234, - "loss": 3.3405, - "step": 30790 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002728322187232465, - "loss": 3.2946, - "step": 30795 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027275029271513695, - "loss": 3.3734, - "step": 30800 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027266836875606567, - "loss": 3.3478, - "step": 30805 - }, - { - "epoch": 0.53, - "learning_rate": 0.000272586446852193, - "loss": 3.2108, - "step": 30810 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002725045270096792, - "loss": 3.2706, - "step": 30815 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002724226092346839, - "loss": 3.3201, - "step": 30820 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002723406935333671, - "loss": 3.2457, - "step": 30825 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002722587799118883, - "loss": 3.2723, - "step": 30830 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027217686837640714, - "loss": 3.3165, - "step": 30835 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027209495893308296, - "loss": 3.2665, - "step": 30840 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027201305158807483, - "loss": 3.3003, - "step": 30845 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002719311463475418, - "loss": 3.2617, - "step": 30850 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002718492432176426, - "loss": 3.3741, - "step": 30855 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002717673422045361, - "loss": 3.4239, - "step": 30860 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002716854433143809, - "loss": 3.3527, - "step": 30865 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002716035465533351, - "loss": 3.3153, - "step": 30870 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027152165192755707, - "loss": 3.2969, - "step": 30875 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027143975944320494, - "loss": 3.3132, - "step": 30880 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027135786910643646, - "loss": 3.238, - "step": 30885 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002712759809234096, - "loss": 3.3268, - "step": 30890 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002711940949002816, - "loss": 3.3004, - "step": 30895 - }, - { - "epoch": 0.53, - "learning_rate": 0.00027111221104321004, - "loss": 3.3266, - "step": 30900 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002710303293583522, - "loss": 3.3325, - "step": 30905 - }, - { - "epoch": 0.53, - "learning_rate": 0.0002709484498518651, - "loss": 3.3649, - "step": 30910 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002708665725299058, - "loss": 3.3601, - "step": 30915 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002707846973986308, - "loss": 3.3293, - "step": 30920 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002707028244641969, - "loss": 3.2142, - "step": 30925 - }, - { - "epoch": 0.54, - "learning_rate": 0.00027062095373276036, - "loss": 3.1972, - "step": 30930 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002705390852104776, - "loss": 3.2593, - "step": 30935 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002704572189035047, - "loss": 3.3917, - "step": 30940 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002703753548179976, - "loss": 3.2728, - "step": 30945 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002702934929601118, - "loss": 3.4312, - "step": 30950 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002702116333360031, - "loss": 3.3181, - "step": 30955 - }, - { - "epoch": 0.54, - "learning_rate": 0.000270129775951827, - "loss": 3.3649, - "step": 30960 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002700479208137386, - "loss": 3.3491, - "step": 30965 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002699660679278932, - "loss": 3.194, - "step": 30970 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026988421730044545, - "loss": 3.3569, - "step": 30975 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002698023689375502, - "loss": 3.1581, - "step": 30980 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002697205228453621, - "loss": 3.2575, - "step": 30985 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026963867903003543, - "loss": 3.2312, - "step": 30990 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026955683749772466, - "loss": 3.3679, - "step": 30995 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002694749982545835, - "loss": 3.2296, - "step": 31000 - }, - { - "epoch": 0.54, - "eval_loss": 3.32407808303833, - "eval_runtime": 149.9754, - "eval_samples_per_second": 12.275, - "eval_steps_per_second": 0.773, - "step": 31000 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026939316130676616, - "loss": 3.2916, - "step": 31005 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026931132666042614, - "loss": 3.316, - "step": 31010 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002692294943217171, - "loss": 3.183, - "step": 31015 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002691476642967925, - "loss": 3.3019, - "step": 31020 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002690658365918054, - "loss": 3.1547, - "step": 31025 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026898401121290875, - "loss": 3.3068, - "step": 31030 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002689021881662555, - "loss": 3.2116, - "step": 31035 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002688203674579984, - "loss": 3.2976, - "step": 31040 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002687385490942899, - "loss": 3.3166, - "step": 31045 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026865673308128224, - "loss": 3.2547, - "step": 31050 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002685749194251276, - "loss": 3.3457, - "step": 31055 - }, - { - "epoch": 0.54, - "learning_rate": 0.000268493108131978, - "loss": 3.2873, - "step": 31060 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026841129920798513, - "loss": 3.3012, - "step": 31065 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026832949265930087, - "loss": 3.1554, - "step": 31070 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002682476884920763, - "loss": 3.4911, - "step": 31075 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002681658867124628, - "loss": 3.2928, - "step": 31080 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002680840873266115, - "loss": 3.0558, - "step": 31085 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026800229034067326, - "loss": 3.2551, - "step": 31090 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002679204957607989, - "loss": 3.2682, - "step": 31095 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002678387035931387, - "loss": 3.3801, - "step": 31100 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002677569138438433, - "loss": 3.195, - "step": 31105 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026767512651906263, - "loss": 3.2651, - "step": 31110 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002675933416249468, - "loss": 3.3951, - "step": 31115 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026751155916764573, - "loss": 3.3247, - "step": 31120 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026742977915330884, - "loss": 3.3234, - "step": 31125 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026734800158808555, - "loss": 3.3103, - "step": 31130 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026726622647812526, - "loss": 3.3172, - "step": 31135 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026718445382957695, - "loss": 3.4098, - "step": 31140 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026710268364858963, - "loss": 3.3378, - "step": 31145 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026702091594131187, - "loss": 3.3174, - "step": 31150 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026693915071389216, - "loss": 3.2108, - "step": 31155 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026685738797247886, - "loss": 3.3841, - "step": 31160 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026677562772322016, - "loss": 3.4254, - "step": 31165 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002666938699722641, - "loss": 3.3229, - "step": 31170 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002666121147257582, - "loss": 3.1803, - "step": 31175 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026653036198985016, - "loss": 3.2278, - "step": 31180 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026644861177068746, - "loss": 3.2767, - "step": 31185 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002663668640744171, - "loss": 3.3793, - "step": 31190 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002662851189071862, - "loss": 3.2449, - "step": 31195 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026620337627514165, - "loss": 3.3237, - "step": 31200 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026612163618443003, - "loss": 3.3144, - "step": 31205 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002660398986411976, - "loss": 3.3152, - "step": 31210 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002659581636515908, - "loss": 3.2679, - "step": 31215 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002658764312217557, - "loss": 3.2625, - "step": 31220 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002657947013578381, - "loss": 3.2968, - "step": 31225 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002657129740659836, - "loss": 3.2104, - "step": 31230 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002656312493523377, - "loss": 3.331, - "step": 31235 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002655495272230457, - "loss": 3.2342, - "step": 31240 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002654678076842528, - "loss": 3.2085, - "step": 31245 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002653860907421038, - "loss": 3.311, - "step": 31250 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002653043764027433, - "loss": 3.2037, - "step": 31255 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002652226646723159, - "loss": 3.1527, - "step": 31260 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002651409555569659, - "loss": 3.271, - "step": 31265 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002650592490628373, - "loss": 3.3012, - "step": 31270 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026497754519607433, - "loss": 3.3312, - "step": 31275 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002648958439628202, - "loss": 3.3229, - "step": 31280 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026481414536921893, - "loss": 3.2941, - "step": 31285 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026473244942141344, - "loss": 3.2946, - "step": 31290 - }, - { - "epoch": 0.54, - "learning_rate": 0.000264650756125547, - "loss": 3.3225, - "step": 31295 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002645690654877627, - "loss": 3.2401, - "step": 31300 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026448737751420305, - "loss": 3.328, - "step": 31305 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002644056922110105, - "loss": 3.3317, - "step": 31310 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002643240095843275, - "loss": 3.2121, - "step": 31315 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002642423296402962, - "loss": 3.2404, - "step": 31320 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026416065238505846, - "loss": 3.2064, - "step": 31325 - }, - { - "epoch": 0.54, - "learning_rate": 0.000264078977824756, - "loss": 3.261, - "step": 31330 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026399730596553023, - "loss": 3.174, - "step": 31335 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002639156368135226, - "loss": 3.3145, - "step": 31340 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026383397037487414, - "loss": 3.4055, - "step": 31345 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026375230665572584, - "loss": 3.2682, - "step": 31350 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002636706456622182, - "loss": 3.3942, - "step": 31355 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002635889874004918, - "loss": 3.1798, - "step": 31360 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026350733187668706, - "loss": 3.2755, - "step": 31365 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026342567909694385, - "loss": 3.1718, - "step": 31370 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026334402906740223, - "loss": 3.1946, - "step": 31375 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026326238179420167, - "loss": 3.2054, - "step": 31380 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002631807372834818, - "loss": 3.257, - "step": 31385 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002630990955413817, - "loss": 3.3149, - "step": 31390 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026301745657404045, - "loss": 3.3137, - "step": 31395 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026293582038759716, - "loss": 3.3703, - "step": 31400 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026285418698819004, - "loss": 3.2994, - "step": 31405 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026277255638195774, - "loss": 3.2856, - "step": 31410 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026269092857503836, - "loss": 3.3122, - "step": 31415 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026260930357357, - "loss": 3.2749, - "step": 31420 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026252768138369044, - "loss": 3.161, - "step": 31425 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002624460620115372, - "loss": 3.2431, - "step": 31430 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002623644454632475, - "loss": 3.2436, - "step": 31435 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002622828317449587, - "loss": 3.3021, - "step": 31440 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026220122086280764, - "loss": 3.2542, - "step": 31445 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002621196128229311, - "loss": 3.2806, - "step": 31450 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002620380076314656, - "loss": 3.2708, - "step": 31455 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026195640529454726, - "loss": 3.3919, - "step": 31460 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002618748058183123, - "loss": 3.2865, - "step": 31465 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002617932092088965, - "loss": 3.2749, - "step": 31470 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002617116154724356, - "loss": 3.2656, - "step": 31475 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026163002461506503, - "loss": 3.212, - "step": 31480 - }, - { - "epoch": 0.54, - "learning_rate": 0.00026154843664291997, - "loss": 3.2677, - "step": 31485 - }, - { - "epoch": 0.54, - "learning_rate": 0.0002614668515621354, - "loss": 3.2452, - "step": 31490 - }, - { - "epoch": 0.55, - "learning_rate": 0.00026138526937884604, - "loss": 3.2805, - "step": 31495 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002613036900991865, - "loss": 3.3033, - "step": 31500 - }, - { - "epoch": 0.55, - "eval_loss": 3.316983938217163, - "eval_runtime": 150.2836, - "eval_samples_per_second": 12.25, - "eval_steps_per_second": 0.772, - "step": 31500 - }, - { - "epoch": 0.55, - "learning_rate": 0.00026122211372929137, - "loss": 3.3315, - "step": 31505 - }, - { - "epoch": 0.55, - "learning_rate": 0.00026114054027529443, - "loss": 3.3347, - "step": 31510 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002610589697433297, - "loss": 3.3488, - "step": 31515 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002609774021395308, - "loss": 3.3674, - "step": 31520 - }, - { - "epoch": 0.55, - "learning_rate": 0.00026089583747003134, - "loss": 3.1443, - "step": 31525 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002608142757409646, - "loss": 3.3235, - "step": 31530 - }, - { - "epoch": 0.55, - "learning_rate": 0.00026073271695846337, - "loss": 3.2616, - "step": 31535 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002606511611286606, - "loss": 3.328, - "step": 31540 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002605696082576889, - "loss": 3.339, - "step": 31545 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002604880583516805, - "loss": 3.2483, - "step": 31550 - }, - { - "epoch": 0.55, - "learning_rate": 0.00026040651141676774, - "loss": 3.3018, - "step": 31555 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002603249674590823, - "loss": 3.3246, - "step": 31560 - }, - { - "epoch": 0.55, - "learning_rate": 0.00026024342648475594, - "loss": 3.4165, - "step": 31565 - }, - { - "epoch": 0.55, - "learning_rate": 0.00026016188849992025, - "loss": 3.2257, - "step": 31570 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002600803535107063, - "loss": 3.2111, - "step": 31575 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002599988215232453, - "loss": 3.2358, - "step": 31580 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025991729254366786, - "loss": 3.3897, - "step": 31585 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025983576657810463, - "loss": 3.2663, - "step": 31590 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002597542436326858, - "loss": 3.2689, - "step": 31595 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025967272371354164, - "loss": 3.2168, - "step": 31600 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025959120682680215, - "loss": 3.2391, - "step": 31605 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025950969297859665, - "loss": 3.2852, - "step": 31610 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025942818217505474, - "loss": 3.3367, - "step": 31615 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002593466744223056, - "loss": 3.283, - "step": 31620 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025926516972647827, - "loss": 3.3326, - "step": 31625 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002591836680937014, - "loss": 3.3459, - "step": 31630 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025910216953010343, - "loss": 3.4016, - "step": 31635 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025902067404181276, - "loss": 3.2182, - "step": 31640 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002589391816349574, - "loss": 3.2924, - "step": 31645 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002588576923156651, - "loss": 3.3283, - "step": 31650 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002587762060900634, - "loss": 3.346, - "step": 31655 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025869472296428, - "loss": 3.1884, - "step": 31660 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025861324294444157, - "loss": 3.2707, - "step": 31665 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002585317660366752, - "loss": 3.2709, - "step": 31670 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025845029224710745, - "loss": 3.3347, - "step": 31675 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002583688215818648, - "loss": 3.268, - "step": 31680 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025828735404707356, - "loss": 3.2481, - "step": 31685 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002582058896488594, - "loss": 3.3265, - "step": 31690 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002581244283933481, - "loss": 3.2392, - "step": 31695 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002580429702866652, - "loss": 3.3131, - "step": 31700 - }, - { - "epoch": 0.55, - "learning_rate": 0.000257961515334936, - "loss": 3.2571, - "step": 31705 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002578800635442854, - "loss": 3.3224, - "step": 31710 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025779861492083805, - "loss": 3.3081, - "step": 31715 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002577171694707186, - "loss": 3.3205, - "step": 31720 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025763572720005135, - "loss": 3.1753, - "step": 31725 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025755428811496023, - "loss": 3.2786, - "step": 31730 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002574728522215693, - "loss": 3.1329, - "step": 31735 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025739141952600176, - "loss": 3.1854, - "step": 31740 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002573099900343811, - "loss": 3.2668, - "step": 31745 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002572285637528305, - "loss": 3.3433, - "step": 31750 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025714714068747263, - "loss": 3.4145, - "step": 31755 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025706572084443027, - "loss": 3.4166, - "step": 31760 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025698430422982555, - "loss": 3.2479, - "step": 31765 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025690289084978076, - "loss": 3.2204, - "step": 31770 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025682148071041767, - "loss": 3.1905, - "step": 31775 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002567400738178579, - "loss": 3.3027, - "step": 31780 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002566586701782231, - "loss": 3.3398, - "step": 31785 - }, - { - "epoch": 0.55, - "learning_rate": 0.000256577269797634, - "loss": 3.3163, - "step": 31790 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002564958726822117, - "loss": 3.2045, - "step": 31795 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002564144788380767, - "loss": 3.313, - "step": 31800 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025633308827134973, - "loss": 3.2722, - "step": 31805 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025625170098815065, - "loss": 3.3247, - "step": 31810 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025617031699459947, - "loss": 3.3358, - "step": 31815 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025608893629681576, - "loss": 3.0963, - "step": 31820 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025600755890091897, - "loss": 3.2378, - "step": 31825 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002559261848130284, - "loss": 3.2526, - "step": 31830 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002558448140392629, - "loss": 3.3501, - "step": 31835 - }, - { - "epoch": 0.55, - "learning_rate": 0.000255763446585741, - "loss": 3.1792, - "step": 31840 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025568208245858116, - "loss": 3.27, - "step": 31845 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025560072166390163, - "loss": 3.1158, - "step": 31850 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002555193642078203, - "loss": 3.289, - "step": 31855 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002554380100964549, - "loss": 3.2798, - "step": 31860 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025535665933592265, - "loss": 3.3566, - "step": 31865 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002552753119323409, - "loss": 3.3437, - "step": 31870 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025519396789182644, - "loss": 3.2963, - "step": 31875 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002551126272204959, - "loss": 3.3405, - "step": 31880 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025503128992446586, - "loss": 3.2588, - "step": 31885 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002549499560098524, - "loss": 3.2521, - "step": 31890 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025486862548277126, - "loss": 3.2144, - "step": 31895 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002547872983493382, - "loss": 3.3133, - "step": 31900 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025470597461566864, - "loss": 3.2138, - "step": 31905 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025462465428787763, - "loss": 3.2776, - "step": 31910 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002545433373720801, - "loss": 3.2242, - "step": 31915 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002544620238743906, - "loss": 3.2321, - "step": 31920 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025438071380092355, - "loss": 3.2964, - "step": 31925 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025429940715779304, - "loss": 3.2827, - "step": 31930 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002542181039511128, - "loss": 3.265, - "step": 31935 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025413680418699675, - "loss": 3.2511, - "step": 31940 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025405550787155774, - "loss": 3.3294, - "step": 31945 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002539742150109092, - "loss": 3.3519, - "step": 31950 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002538929256111637, - "loss": 3.3194, - "step": 31955 - }, - { - "epoch": 0.55, - "learning_rate": 0.000253811639678434, - "loss": 3.3208, - "step": 31960 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025373035721883234, - "loss": 3.2799, - "step": 31965 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002536490782384706, - "loss": 3.3969, - "step": 31970 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025356780274346063, - "loss": 3.261, - "step": 31975 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025348653073991386, - "loss": 3.1804, - "step": 31980 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002534052622339417, - "loss": 3.322, - "step": 31985 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025332399723165507, - "loss": 3.2808, - "step": 31990 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002532427357391646, - "loss": 3.266, - "step": 31995 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002531614777625807, - "loss": 3.3233, - "step": 32000 - }, - { - "epoch": 0.55, - "eval_loss": 3.3087377548217773, - "eval_runtime": 149.8766, - "eval_samples_per_second": 12.283, - "eval_steps_per_second": 0.774, - "step": 32000 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002530802233080136, - "loss": 3.4058, - "step": 32005 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025299897238157337, - "loss": 3.3462, - "step": 32010 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002529177249893696, - "loss": 3.3441, - "step": 32015 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002528364811375115, - "loss": 3.3583, - "step": 32020 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002527552408321083, - "loss": 3.2635, - "step": 32025 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025267400407926886, - "loss": 3.2558, - "step": 32030 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002525927708851018, - "loss": 3.3147, - "step": 32035 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025251154125571545, - "loss": 3.2104, - "step": 32040 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025243031519721777, - "loss": 3.4015, - "step": 32045 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025234909271571663, - "loss": 3.1764, - "step": 32050 - }, - { - "epoch": 0.55, - "learning_rate": 0.0002522678738173195, - "loss": 3.282, - "step": 32055 - }, - { - "epoch": 0.55, - "learning_rate": 0.00025218665850813355, - "loss": 3.2438, - "step": 32060 - }, - { - "epoch": 0.55, - "learning_rate": 0.000252105446794266, - "loss": 3.2136, - "step": 32065 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002520242386818233, - "loss": 3.3125, - "step": 32070 - }, - { - "epoch": 0.56, - "learning_rate": 0.000251943034176912, - "loss": 3.3091, - "step": 32075 - }, - { - "epoch": 0.56, - "learning_rate": 0.00025186183328563817, - "loss": 3.2398, - "step": 32080 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002517806360141078, - "loss": 3.2099, - "step": 32085 - }, - { - "epoch": 0.56, - "learning_rate": 0.00025169944236842664, - "loss": 3.3446, - "step": 32090 - }, - { - "epoch": 0.56, - "learning_rate": 0.00025161825235469975, - "loss": 3.3282, - "step": 32095 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002515370659790323, - "loss": 3.2385, - "step": 32100 - }, - { - "epoch": 0.56, - "learning_rate": 0.00025145588324752914, - "loss": 3.3214, - "step": 32105 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002513747041662948, - "loss": 3.2566, - "step": 32110 - }, - { - "epoch": 0.56, - "learning_rate": 0.00025129352874143343, - "loss": 3.329, - "step": 32115 - }, - { - "epoch": 0.56, - "learning_rate": 0.00025121235697904925, - "loss": 3.2985, - "step": 32120 - }, - { - "epoch": 0.56, - "learning_rate": 0.00025113118888524564, - "loss": 3.2731, - "step": 32125 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002510500244661263, - "loss": 3.2967, - "step": 32130 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002509688637277941, - "loss": 3.2774, - "step": 32135 - }, - { - "epoch": 0.56, - "learning_rate": 0.00025088770667635213, - "loss": 3.2615, - "step": 32140 - }, - { - "epoch": 0.56, - "learning_rate": 0.000250806553317903, - "loss": 3.2842, - "step": 32145 - }, - { - "epoch": 0.56, - "learning_rate": 0.00025072540365854895, - "loss": 3.1892, - "step": 32150 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002506442577043919, - "loss": 3.1842, - "step": 32155 - }, - { - "epoch": 0.56, - "learning_rate": 0.00025056311546153376, - "loss": 3.246, - "step": 32160 - }, - { - "epoch": 0.56, - "learning_rate": 0.000250481976936076, - "loss": 3.3429, - "step": 32165 - }, - { - "epoch": 0.56, - "learning_rate": 0.00025040084213411983, - "loss": 3.2132, - "step": 32170 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002503197110617661, - "loss": 3.3142, - "step": 32175 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002502385837251155, - "loss": 3.2998, - "step": 32180 - }, - { - "epoch": 0.56, - "learning_rate": 0.00025015746013026834, - "loss": 3.1796, - "step": 32185 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002500763402833248, - "loss": 3.2882, - "step": 32190 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024999522419038466, - "loss": 3.2778, - "step": 32195 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002499141118575473, - "loss": 3.2608, - "step": 32200 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024983300329091206, - "loss": 3.2805, - "step": 32205 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024975189849657785, - "loss": 3.3096, - "step": 32210 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002496707974806434, - "loss": 3.2804, - "step": 32215 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002495897002492071, - "loss": 3.3137, - "step": 32220 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002495086068083669, - "loss": 3.3505, - "step": 32225 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002494275171642207, - "loss": 3.2128, - "step": 32230 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024934643132286605, - "loss": 3.3917, - "step": 32235 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024926534929040013, - "loss": 3.2804, - "step": 32240 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024918427107292003, - "loss": 3.309, - "step": 32245 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024910319667652227, - "loss": 3.2208, - "step": 32250 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024902212610730326, - "loss": 3.3962, - "step": 32255 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002489410593713591, - "loss": 3.3368, - "step": 32260 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002488599964747856, - "loss": 3.2554, - "step": 32265 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002487789374236784, - "loss": 3.3785, - "step": 32270 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024869788222413254, - "loss": 3.2223, - "step": 32275 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002486168308822429, - "loss": 3.1955, - "step": 32280 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002485357834041043, - "loss": 3.2661, - "step": 32285 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002484547397958111, - "loss": 3.3976, - "step": 32290 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002483737000634574, - "loss": 3.3036, - "step": 32295 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024829266421313664, - "loss": 3.2412, - "step": 32300 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024821163225094265, - "loss": 3.2499, - "step": 32305 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024813060418296846, - "loss": 3.2339, - "step": 32310 - }, - { - "epoch": 0.56, - "learning_rate": 0.000248049580015307, - "loss": 3.2515, - "step": 32315 - }, - { - "epoch": 0.56, - "learning_rate": 0.000247968559754051, - "loss": 3.2655, - "step": 32320 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002478875434052925, - "loss": 3.1095, - "step": 32325 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002478065309751237, - "loss": 3.322, - "step": 32330 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002477255224696364, - "loss": 3.2905, - "step": 32335 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002476445178949218, - "loss": 3.2749, - "step": 32340 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024756351725707107, - "loss": 3.2526, - "step": 32345 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024748252056217533, - "loss": 3.3218, - "step": 32350 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002474015278163248, - "loss": 3.3869, - "step": 32355 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002473205390256097, - "loss": 3.2438, - "step": 32360 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024723955419612016, - "loss": 3.3304, - "step": 32365 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002471585733339458, - "loss": 3.2835, - "step": 32370 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002470775964451759, - "loss": 3.1467, - "step": 32375 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002469966235358995, - "loss": 3.322, - "step": 32380 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002469156546122053, - "loss": 3.1773, - "step": 32385 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024683468968018197, - "loss": 3.1448, - "step": 32390 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002467537287459174, - "loss": 3.2498, - "step": 32395 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024667277181549967, - "loss": 3.2981, - "step": 32400 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024659181889501603, - "loss": 3.3788, - "step": 32405 - }, - { - "epoch": 0.56, - "learning_rate": 0.000246510869990554, - "loss": 3.2933, - "step": 32410 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002464299251082004, - "loss": 3.3849, - "step": 32415 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002463489842540418, - "loss": 3.3092, - "step": 32420 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024626804743416484, - "loss": 3.3249, - "step": 32425 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002461871146546551, - "loss": 3.2616, - "step": 32430 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024610618592159876, - "loss": 3.1845, - "step": 32435 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002460252612410809, - "loss": 3.3216, - "step": 32440 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002459443406191868, - "loss": 3.3316, - "step": 32445 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002458634240620013, - "loss": 3.1841, - "step": 32450 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024578251157560886, - "loss": 3.3593, - "step": 32455 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024570160316609365, - "loss": 3.263, - "step": 32460 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024562069883953956, - "loss": 3.2655, - "step": 32465 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024553979860203034, - "loss": 3.1221, - "step": 32470 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002454589024596492, - "loss": 3.1684, - "step": 32475 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024537801041847894, - "loss": 3.2231, - "step": 32480 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002452971224846024, - "loss": 3.3318, - "step": 32485 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002452162386641019, - "loss": 3.2675, - "step": 32490 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002451353589630595, - "loss": 3.2606, - "step": 32495 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024505448338755706, - "loss": 3.2408, - "step": 32500 - }, - { - "epoch": 0.56, - "eval_loss": 3.3065123558044434, - "eval_runtime": 149.9745, - "eval_samples_per_second": 12.275, - "eval_steps_per_second": 0.773, - "step": 32500 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024497361194367564, - "loss": 3.2712, - "step": 32505 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002448927446374967, - "loss": 3.2798, - "step": 32510 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024481188147510097, - "loss": 3.2018, - "step": 32515 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002447310224625689, - "loss": 3.2207, - "step": 32520 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024465016760598075, - "loss": 3.3062, - "step": 32525 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024456931691141625, - "loss": 3.2997, - "step": 32530 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024448847038495506, - "loss": 3.2389, - "step": 32535 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002444076280326764, - "loss": 3.2905, - "step": 32540 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024432678986065914, - "loss": 3.1733, - "step": 32545 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024424595587498217, - "loss": 3.2917, - "step": 32550 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002441651260817235, - "loss": 3.262, - "step": 32555 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002440843004869611, - "loss": 3.2987, - "step": 32560 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002440034790967727, - "loss": 3.3115, - "step": 32565 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024392266191723583, - "loss": 3.2615, - "step": 32570 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002438418489544273, - "loss": 3.3166, - "step": 32575 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024376104021442408, - "loss": 3.3056, - "step": 32580 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024368023570330225, - "loss": 3.236, - "step": 32585 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024359943542713805, - "loss": 3.1418, - "step": 32590 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024351863939200735, - "loss": 3.3424, - "step": 32595 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024343784760398545, - "loss": 3.2941, - "step": 32600 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024335706006914766, - "loss": 3.2504, - "step": 32605 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024327627679356854, - "loss": 3.1296, - "step": 32610 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024319549778332273, - "loss": 3.1769, - "step": 32615 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024311472304448432, - "loss": 3.2691, - "step": 32620 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024303395258312724, - "loss": 3.2445, - "step": 32625 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024295318640532506, - "loss": 3.2382, - "step": 32630 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024287242451715086, - "loss": 3.1444, - "step": 32635 - }, - { - "epoch": 0.56, - "learning_rate": 0.0002427916669246775, - "loss": 3.3026, - "step": 32640 - }, - { - "epoch": 0.56, - "learning_rate": 0.00024271091363397757, - "loss": 3.2935, - "step": 32645 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024263016465112344, - "loss": 3.1797, - "step": 32650 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024254941998218694, - "loss": 3.2365, - "step": 32655 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024246867963323952, - "loss": 3.3014, - "step": 32660 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024238794361035253, - "loss": 3.2298, - "step": 32665 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024230721191959703, - "loss": 3.2835, - "step": 32670 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002422264845670434, - "loss": 3.2102, - "step": 32675 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002421457615587622, - "loss": 3.329, - "step": 32680 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024206504290082313, - "loss": 3.2735, - "step": 32685 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002419843285992959, - "loss": 3.2008, - "step": 32690 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002419036186602499, - "loss": 3.263, - "step": 32695 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024182291308975394, - "loss": 3.3097, - "step": 32700 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024174221189387693, - "loss": 3.202, - "step": 32705 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024166151507868687, - "loss": 3.2625, - "step": 32710 - }, - { - "epoch": 0.57, - "learning_rate": 0.000241580822650252, - "loss": 3.2967, - "step": 32715 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002415001346146398, - "loss": 3.1632, - "step": 32720 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024141945097791765, - "loss": 3.2288, - "step": 32725 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024133877174615267, - "loss": 3.2721, - "step": 32730 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002412580969254114, - "loss": 3.2332, - "step": 32735 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024117742652176007, - "loss": 3.2993, - "step": 32740 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024109676054126486, - "loss": 3.3246, - "step": 32745 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024101609898999145, - "loss": 3.2481, - "step": 32750 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024093544187400511, - "loss": 3.1872, - "step": 32755 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024085478919937082, - "loss": 3.1915, - "step": 32760 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002407741409721532, - "loss": 3.2906, - "step": 32765 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024069349719841663, - "loss": 3.3152, - "step": 32770 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024061285788422524, - "loss": 3.3742, - "step": 32775 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024053222303564264, - "loss": 3.2066, - "step": 32780 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024045159265873194, - "loss": 3.2123, - "step": 32785 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024037096675955634, - "loss": 3.3466, - "step": 32790 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024029034534417848, - "loss": 3.3364, - "step": 32795 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024020972841866056, - "loss": 3.263, - "step": 32800 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024012911598906466, - "loss": 3.138, - "step": 32805 - }, - { - "epoch": 0.57, - "learning_rate": 0.00024004850806145256, - "loss": 3.308, - "step": 32810 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002399679046418853, - "loss": 3.3403, - "step": 32815 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002398873057364239, - "loss": 3.3254, - "step": 32820 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023980671135112906, - "loss": 3.3335, - "step": 32825 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023972612149206107, - "loss": 3.1811, - "step": 32830 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023964553616527987, - "loss": 3.2545, - "step": 32835 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023956495537684504, - "loss": 3.2524, - "step": 32840 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023948437913281576, - "loss": 3.187, - "step": 32845 - }, - { - "epoch": 0.57, - "learning_rate": 0.000239403807439251, - "loss": 3.234, - "step": 32850 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023932324030220947, - "loss": 3.3456, - "step": 32855 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002392426777277494, - "loss": 3.3587, - "step": 32860 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023916211972192843, - "loss": 3.1395, - "step": 32865 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023908156629080427, - "loss": 3.3324, - "step": 32870 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002390010174404342, - "loss": 3.2773, - "step": 32875 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023892047317687493, - "loss": 3.3637, - "step": 32880 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023883993350618316, - "loss": 3.216, - "step": 32885 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023875939843441484, - "loss": 3.2905, - "step": 32890 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023867886796762598, - "loss": 3.2365, - "step": 32895 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002385983421118719, - "loss": 3.2825, - "step": 32900 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002385178208732078, - "loss": 3.2922, - "step": 32905 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023843730425768863, - "loss": 3.1794, - "step": 32910 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023835679227136858, - "loss": 3.2932, - "step": 32915 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023827628492030184, - "loss": 3.3405, - "step": 32920 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002381957822105421, - "loss": 3.3375, - "step": 32925 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023811528414814285, - "loss": 3.1655, - "step": 32930 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002380347907391571, - "loss": 3.2732, - "step": 32935 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002379543019896375, - "loss": 3.2293, - "step": 32940 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023787381790563636, - "loss": 3.2525, - "step": 32945 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023779333849320572, - "loss": 3.2663, - "step": 32950 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002377128637583973, - "loss": 3.328, - "step": 32955 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023763239370726237, - "loss": 3.2032, - "step": 32960 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023755192834585165, - "loss": 3.2314, - "step": 32965 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002374714676802159, - "loss": 3.3097, - "step": 32970 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023739101171640537, - "loss": 3.2052, - "step": 32975 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023731056046046985, - "loss": 3.3136, - "step": 32980 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023723011391845903, - "loss": 3.3039, - "step": 32985 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002371496720964218, - "loss": 3.2194, - "step": 32990 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002370692350004072, - "loss": 3.2503, - "step": 32995 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023698880263646357, - "loss": 3.2548, - "step": 33000 - }, - { - "epoch": 0.57, - "eval_loss": 3.29677677154541, - "eval_runtime": 149.7729, - "eval_samples_per_second": 12.292, - "eval_steps_per_second": 0.775, - "step": 33000 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023690837501063905, - "loss": 3.2068, - "step": 33005 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002368279521289815, - "loss": 3.3154, - "step": 33010 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002367475339975381, - "loss": 3.3981, - "step": 33015 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023666712062235602, - "loss": 3.1803, - "step": 33020 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002365867120094818, - "loss": 3.2611, - "step": 33025 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002365063081649619, - "loss": 3.331, - "step": 33030 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023642590909484236, - "loss": 3.2699, - "step": 33035 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023634551480516852, - "loss": 3.2775, - "step": 33040 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023626512530198574, - "loss": 3.2124, - "step": 33045 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023618474059133887, - "loss": 3.267, - "step": 33050 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023610436067927253, - "loss": 3.3642, - "step": 33055 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023602398557183074, - "loss": 3.288, - "step": 33060 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002359436152750575, - "loss": 3.2004, - "step": 33065 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023586324979499597, - "loss": 3.1586, - "step": 33070 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023578288913768942, - "loss": 3.2883, - "step": 33075 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023570253330918044, - "loss": 3.2835, - "step": 33080 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023562218231551143, - "loss": 3.328, - "step": 33085 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002355418361627245, - "loss": 3.1934, - "step": 33090 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023546149485686104, - "loss": 3.3223, - "step": 33095 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002353811584039625, - "loss": 3.26, - "step": 33100 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023530082681006957, - "loss": 3.3357, - "step": 33105 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023522050008122292, - "loss": 3.3325, - "step": 33110 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002351401782234628, - "loss": 3.2175, - "step": 33115 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002350598612428288, - "loss": 3.2222, - "step": 33120 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023497954914536033, - "loss": 3.3419, - "step": 33125 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023489924193709658, - "loss": 3.2388, - "step": 33130 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023481893962407622, - "loss": 3.2658, - "step": 33135 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023473864221233766, - "loss": 3.3275, - "step": 33140 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023465834970791863, - "loss": 3.3339, - "step": 33145 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002345780621168568, - "loss": 3.2137, - "step": 33150 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023449777944518948, - "loss": 3.1605, - "step": 33155 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002344175016989534, - "loss": 3.2205, - "step": 33160 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002343372288841852, - "loss": 3.3544, - "step": 33165 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023425696100692078, - "loss": 3.3108, - "step": 33170 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023417669807319598, - "loss": 3.2136, - "step": 33175 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023409644008904611, - "loss": 3.2587, - "step": 33180 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023401618706050617, - "loss": 3.3205, - "step": 33185 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023393593899361095, - "loss": 3.2509, - "step": 33190 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023385569589439445, - "loss": 3.2005, - "step": 33195 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023377545776889064, - "loss": 3.2256, - "step": 33200 - }, - { - "epoch": 0.57, - "learning_rate": 0.000233695224623133, - "loss": 3.231, - "step": 33205 - }, - { - "epoch": 0.57, - "learning_rate": 0.0002336149964631546, - "loss": 3.2971, - "step": 33210 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023353477329498843, - "loss": 3.2918, - "step": 33215 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023345455512466658, - "loss": 3.2987, - "step": 33220 - }, - { - "epoch": 0.57, - "learning_rate": 0.00023337434195822108, - "loss": 3.3542, - "step": 33225 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023329413380168365, - "loss": 3.3258, - "step": 33230 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023321393066108557, - "loss": 3.2357, - "step": 33235 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023313373254245763, - "loss": 3.2245, - "step": 33240 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023305353945183023, - "loss": 3.3036, - "step": 33245 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023297335139523356, - "loss": 3.3151, - "step": 33250 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023289316837869743, - "loss": 3.2807, - "step": 33255 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023281299040825104, - "loss": 3.2591, - "step": 33260 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002327328174899236, - "loss": 3.3142, - "step": 33265 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023265264962974339, - "loss": 3.2627, - "step": 33270 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023257248683373878, - "loss": 3.3279, - "step": 33275 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023249232910793768, - "loss": 3.234, - "step": 33280 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002324121764583674, - "loss": 3.2011, - "step": 33285 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023233202889105507, - "loss": 3.2268, - "step": 33290 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023225188641202746, - "loss": 3.1476, - "step": 33295 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023217174902731078, - "loss": 3.2587, - "step": 33300 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023209161674293087, - "loss": 3.1833, - "step": 33305 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023201148956491336, - "loss": 3.1849, - "step": 33310 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023193136749928342, - "loss": 3.2485, - "step": 33315 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023185125055206592, - "loss": 3.2812, - "step": 33320 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023177113872928498, - "loss": 3.1795, - "step": 33325 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023169103203696473, - "loss": 3.3434, - "step": 33330 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023161093048112887, - "loss": 3.3059, - "step": 33335 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023153083406780045, - "loss": 3.3019, - "step": 33340 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023145074280300258, - "loss": 3.2374, - "step": 33345 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023137065669275739, - "loss": 3.2438, - "step": 33350 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002312905757430871, - "loss": 3.1102, - "step": 33355 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023121049996001344, - "loss": 3.3015, - "step": 33360 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002311304293495576, - "loss": 3.284, - "step": 33365 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023105036391774065, - "loss": 3.3114, - "step": 33370 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023097030367058284, - "loss": 3.2978, - "step": 33375 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023089024861410446, - "loss": 3.2116, - "step": 33380 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023081019875432517, - "loss": 3.3001, - "step": 33385 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023073015409726434, - "loss": 3.317, - "step": 33390 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023065011464894103, - "loss": 3.2739, - "step": 33395 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023057008041537362, - "loss": 3.2719, - "step": 33400 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002304900514025803, - "loss": 3.2217, - "step": 33405 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023041002761657888, - "loss": 3.32, - "step": 33410 - }, - { - "epoch": 0.58, - "learning_rate": 0.00023033000906338676, - "loss": 3.2321, - "step": 33415 - }, - { - "epoch": 0.58, - "learning_rate": 0.000230249995749021, - "loss": 3.2446, - "step": 33420 - }, - { - "epoch": 0.58, - "learning_rate": 0.000230169987679498, - "loss": 3.2752, - "step": 33425 - }, - { - "epoch": 0.58, - "learning_rate": 0.000230089984860834, - "loss": 3.1693, - "step": 33430 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002300099872990449, - "loss": 3.3488, - "step": 33435 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002299299950001461, - "loss": 3.2554, - "step": 33440 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022985000797015262, - "loss": 3.2451, - "step": 33445 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022977002621507893, - "loss": 3.0189, - "step": 33450 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022969004974093932, - "loss": 3.3202, - "step": 33455 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002296100785537477, - "loss": 3.2214, - "step": 33460 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022953011265951738, - "loss": 3.13, - "step": 33465 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002294501520642616, - "loss": 3.2021, - "step": 33470 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022937019677399267, - "loss": 3.2777, - "step": 33475 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022929024679472303, - "loss": 3.279, - "step": 33480 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022921030213246437, - "loss": 3.3105, - "step": 33485 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002291303627932282, - "loss": 3.2978, - "step": 33490 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002290504287830257, - "loss": 3.2142, - "step": 33495 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022897050010786725, - "loss": 3.2172, - "step": 33500 - }, - { - "epoch": 0.58, - "eval_loss": 3.2933146953582764, - "eval_runtime": 149.7693, - "eval_samples_per_second": 12.292, - "eval_steps_per_second": 0.775, - "step": 33500 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022889057677376312, - "loss": 3.3861, - "step": 33505 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002288106587867232, - "loss": 3.1333, - "step": 33510 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022873074615275695, - "loss": 3.2619, - "step": 33515 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022865083887787328, - "loss": 3.2127, - "step": 33520 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022857093696808102, - "loss": 3.3108, - "step": 33525 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022849104042938805, - "loss": 3.3339, - "step": 33530 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002284111492678024, - "loss": 3.2757, - "step": 33535 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002283312634893315, - "loss": 3.2759, - "step": 33540 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002282513830999822, - "loss": 3.2947, - "step": 33545 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022817150810576134, - "loss": 3.2764, - "step": 33550 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022809163851267482, - "loss": 3.2314, - "step": 33555 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022801177432672863, - "loss": 3.2722, - "step": 33560 - }, - { - "epoch": 0.58, - "learning_rate": 0.000227931915553928, - "loss": 3.1763, - "step": 33565 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022785206220027798, - "loss": 3.3469, - "step": 33570 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022777221427178326, - "loss": 3.1805, - "step": 33575 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002276923717744478, - "loss": 3.1481, - "step": 33580 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022761253471427536, - "loss": 3.2782, - "step": 33585 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022753270309726933, - "loss": 3.3483, - "step": 33590 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022745287692943266, - "loss": 3.2516, - "step": 33595 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022737305621676787, - "loss": 3.2987, - "step": 33600 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022729324096527708, - "loss": 3.2064, - "step": 33605 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022721343118096185, - "loss": 3.3007, - "step": 33610 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022713362686982357, - "loss": 3.2484, - "step": 33615 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002270538280378632, - "loss": 3.2258, - "step": 33620 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022697403469108116, - "loss": 3.306, - "step": 33625 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022689424683547735, - "loss": 3.1254, - "step": 33630 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022681446447705153, - "loss": 3.1907, - "step": 33635 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022673468762180295, - "loss": 3.2499, - "step": 33640 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022665491627573033, - "loss": 3.2333, - "step": 33645 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022657515044483223, - "loss": 3.1616, - "step": 33650 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002264953901351064, - "loss": 3.1961, - "step": 33655 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002264156353525506, - "loss": 3.3953, - "step": 33660 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022633588610316185, - "loss": 3.2886, - "step": 33665 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022625614239293692, - "loss": 3.3128, - "step": 33670 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022617640422787226, - "loss": 3.318, - "step": 33675 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022609667161396357, - "loss": 3.1784, - "step": 33680 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022601694455720648, - "loss": 3.3266, - "step": 33685 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022593722306359594, - "loss": 3.2716, - "step": 33690 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002258575071391267, - "loss": 3.2966, - "step": 33695 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022577779678979303, - "loss": 3.2999, - "step": 33700 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002256980920215886, - "loss": 3.1279, - "step": 33705 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022561839284050678, - "loss": 3.3071, - "step": 33710 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022553869925254064, - "loss": 3.2625, - "step": 33715 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002254590112636828, - "loss": 3.2814, - "step": 33720 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002253793288799253, - "loss": 3.25, - "step": 33725 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022529965210725972, - "loss": 3.2336, - "step": 33730 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002252199809516775, - "loss": 3.3184, - "step": 33735 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022514031541916955, - "loss": 3.3121, - "step": 33740 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022506065551572614, - "loss": 3.319, - "step": 33745 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022498100124733738, - "loss": 3.2058, - "step": 33750 - }, - { - "epoch": 0.58, - "learning_rate": 0.000224901352619993, - "loss": 3.2683, - "step": 33755 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022482170963968195, - "loss": 3.3285, - "step": 33760 - }, - { - "epoch": 0.58, - "learning_rate": 0.000224742072312393, - "loss": 3.3569, - "step": 33765 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002246624406441145, - "loss": 3.2333, - "step": 33770 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022458281464083442, - "loss": 3.1978, - "step": 33775 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002245031943085402, - "loss": 3.1741, - "step": 33780 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022442357965321889, - "loss": 3.0947, - "step": 33785 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022434397068085693, - "loss": 3.2276, - "step": 33790 - }, - { - "epoch": 0.58, - "learning_rate": 0.00022426436739744068, - "loss": 3.1653, - "step": 33795 - }, - { - "epoch": 0.58, - "learning_rate": 0.0002241847698089559, - "loss": 3.3112, - "step": 33800 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022410517792138801, - "loss": 3.1834, - "step": 33805 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022402559174072162, - "loss": 3.2776, - "step": 33810 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002239460112729413, - "loss": 3.1643, - "step": 33815 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022386643652403133, - "loss": 3.3707, - "step": 33820 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022378686749997505, - "loss": 3.2157, - "step": 33825 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002237073042067559, - "loss": 3.2477, - "step": 33830 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022362774665035632, - "loss": 3.2571, - "step": 33835 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022354819483675885, - "loss": 3.169, - "step": 33840 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022346864877194526, - "loss": 3.2855, - "step": 33845 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022338910846189707, - "loss": 3.2139, - "step": 33850 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022330957391259544, - "loss": 3.331, - "step": 33855 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002232300451300207, - "loss": 3.3466, - "step": 33860 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022315052212015316, - "loss": 3.2267, - "step": 33865 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022307100488897243, - "loss": 3.1652, - "step": 33870 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002229914934424579, - "loss": 3.2467, - "step": 33875 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022291198778658853, - "loss": 3.3519, - "step": 33880 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022283248792734256, - "loss": 3.1367, - "step": 33885 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022275299387069798, - "loss": 3.2817, - "step": 33890 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022267350562263233, - "loss": 3.1939, - "step": 33895 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022259402318912283, - "loss": 3.1678, - "step": 33900 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022251454657614615, - "loss": 3.2199, - "step": 33905 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022243507578967837, - "loss": 3.289, - "step": 33910 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022235561083569532, - "loss": 3.3205, - "step": 33915 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022227615172017254, - "loss": 3.2538, - "step": 33920 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002221966984490847, - "loss": 3.1713, - "step": 33925 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022211725102840659, - "loss": 3.1795, - "step": 33930 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022203780946411188, - "loss": 3.2756, - "step": 33935 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002219583737621744, - "loss": 3.2947, - "step": 33940 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022187894392856726, - "loss": 3.2601, - "step": 33945 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002217995199692631, - "loss": 3.2433, - "step": 33950 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022172010189023446, - "loss": 3.1556, - "step": 33955 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022164068969745282, - "loss": 3.3248, - "step": 33960 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022156128339688978, - "loss": 3.2672, - "step": 33965 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022148188299451623, - "loss": 3.1232, - "step": 33970 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002214024884963026, - "loss": 3.3216, - "step": 33975 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022132309990821914, - "loss": 3.3364, - "step": 33980 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022124371723623542, - "loss": 3.3008, - "step": 33985 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002211643404863204, - "loss": 3.1925, - "step": 33990 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022108496966444295, - "loss": 3.1433, - "step": 33995 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002210056047765714, - "loss": 3.1537, - "step": 34000 - }, - { - "epoch": 0.59, - "eval_loss": 3.282989501953125, - "eval_runtime": 151.8721, - "eval_samples_per_second": 12.122, - "eval_steps_per_second": 0.764, - "step": 34000 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022092624582867348, - "loss": 3.2759, - "step": 34005 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022084689282671677, - "loss": 3.3142, - "step": 34010 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022076754577666794, - "loss": 3.3801, - "step": 34015 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022068820468449366, - "loss": 3.283, - "step": 34020 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022060886955615986, - "loss": 3.2798, - "step": 34025 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022052954039763218, - "loss": 3.1747, - "step": 34030 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022045021721487593, - "loss": 3.2212, - "step": 34035 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022037090001385552, - "loss": 3.3031, - "step": 34040 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002202915888005354, - "loss": 3.219, - "step": 34045 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022021228358087922, - "loss": 3.2706, - "step": 34050 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022013298436085045, - "loss": 3.2221, - "step": 34055 - }, - { - "epoch": 0.59, - "learning_rate": 0.00022005369114641205, - "loss": 3.2401, - "step": 34060 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002199744039435263, - "loss": 3.2445, - "step": 34065 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002198951227581552, - "loss": 3.2795, - "step": 34070 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002198158475962603, - "loss": 3.3098, - "step": 34075 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021973657846380282, - "loss": 3.2402, - "step": 34080 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021965731536674333, - "loss": 3.2837, - "step": 34085 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002195780583110419, - "loss": 3.2242, - "step": 34090 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002194988073026583, - "loss": 3.3176, - "step": 34095 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021941956234755185, - "loss": 3.2141, - "step": 34100 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021934032345168132, - "loss": 3.2779, - "step": 34105 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021926109062100525, - "loss": 3.189, - "step": 34110 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021918186386148122, - "loss": 3.2255, - "step": 34115 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021910264317906685, - "loss": 3.2848, - "step": 34120 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021902342857971918, - "loss": 3.125, - "step": 34125 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002189442200693946, - "loss": 3.2491, - "step": 34130 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002188650176540494, - "loss": 3.2903, - "step": 34135 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021878582133963895, - "loss": 3.1633, - "step": 34140 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002187066311321186, - "loss": 3.183, - "step": 34145 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021862744703744286, - "loss": 3.227, - "step": 34150 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002185482690615661, - "loss": 3.1407, - "step": 34155 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002184690972104422, - "loss": 3.259, - "step": 34160 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002183899314900243, - "loss": 3.2113, - "step": 34165 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002183107719062652, - "loss": 3.2791, - "step": 34170 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021823161846511746, - "loss": 3.2624, - "step": 34175 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021815247117253298, - "loss": 3.2633, - "step": 34180 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002180733300344632, - "loss": 3.1451, - "step": 34185 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021799419505685915, - "loss": 3.2792, - "step": 34190 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021791506624567128, - "loss": 3.2983, - "step": 34195 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002178359436068498, - "loss": 3.0929, - "step": 34200 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021775682714634428, - "loss": 3.2645, - "step": 34205 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021767771687010386, - "loss": 3.2737, - "step": 34210 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002175986127840773, - "loss": 3.2691, - "step": 34215 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021751951489421266, - "loss": 3.2498, - "step": 34220 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021744042320645787, - "loss": 3.3095, - "step": 34225 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021736133772676006, - "loss": 3.3028, - "step": 34230 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021728225846106612, - "loss": 3.2369, - "step": 34235 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021720318541532262, - "loss": 3.245, - "step": 34240 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021712411859547512, - "loss": 3.2824, - "step": 34245 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021704505800746913, - "loss": 3.3172, - "step": 34250 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021696600365724966, - "loss": 3.2327, - "step": 34255 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002168869555507612, - "loss": 3.198, - "step": 34260 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021680791369394783, - "loss": 3.2636, - "step": 34265 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021672887809275285, - "loss": 3.317, - "step": 34270 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021664984875311948, - "loss": 3.2125, - "step": 34275 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002165708256809904, - "loss": 3.2339, - "step": 34280 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002164918088823076, - "loss": 3.2303, - "step": 34285 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002164127983630129, - "loss": 3.2303, - "step": 34290 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021633379412904727, - "loss": 3.3098, - "step": 34295 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021625479618635153, - "loss": 3.296, - "step": 34300 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021617580454086599, - "loss": 3.1825, - "step": 34305 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002160968191985303, - "loss": 3.2411, - "step": 34310 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021601784016528396, - "loss": 3.2966, - "step": 34315 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021593886744706548, - "loss": 3.2849, - "step": 34320 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021585990104981345, - "loss": 3.2534, - "step": 34325 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021578094097946557, - "loss": 3.2557, - "step": 34330 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021570198724195933, - "loss": 3.2512, - "step": 34335 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021562303984323176, - "loss": 3.2774, - "step": 34340 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021554409878921904, - "loss": 3.2232, - "step": 34345 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021546516408585726, - "loss": 3.28, - "step": 34350 - }, - { - "epoch": 0.59, - "learning_rate": 0.0002153862357390819, - "loss": 3.2287, - "step": 34355 - }, - { - "epoch": 0.59, - "learning_rate": 0.000215307313754828, - "loss": 3.2912, - "step": 34360 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021522839813903013, - "loss": 3.2309, - "step": 34365 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021514948889762224, - "loss": 3.2931, - "step": 34370 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021507058603653783, - "loss": 3.2682, - "step": 34375 - }, - { - "epoch": 0.59, - "learning_rate": 0.00021499168956171016, - "loss": 3.2441, - "step": 34380 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021491279947907178, - "loss": 3.3051, - "step": 34385 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002148339157945549, - "loss": 3.2857, - "step": 34390 - }, - { - "epoch": 0.6, - "learning_rate": 0.000214755038514091, - "loss": 3.274, - "step": 34395 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021467616764361128, - "loss": 3.2249, - "step": 34400 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002145973031890465, - "loss": 3.2408, - "step": 34405 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021451844515632686, - "loss": 3.2944, - "step": 34410 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021443959355138214, - "loss": 3.2375, - "step": 34415 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002143607483801414, - "loss": 3.3012, - "step": 34420 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002142819096485335, - "loss": 3.3001, - "step": 34425 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021420307736248667, - "loss": 3.2736, - "step": 34430 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002141242515279287, - "loss": 3.3536, - "step": 34435 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021404543215078697, - "loss": 3.2831, - "step": 34440 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021396661923698827, - "loss": 3.2456, - "step": 34445 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021388781279245886, - "loss": 3.2275, - "step": 34450 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021380901282312456, - "loss": 3.2121, - "step": 34455 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021373021933491075, - "loss": 3.3182, - "step": 34460 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021365143233374238, - "loss": 3.2926, - "step": 34465 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021357265182554385, - "loss": 3.2554, - "step": 34470 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021349387781623887, - "loss": 3.2501, - "step": 34475 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002134151103117509, - "loss": 3.2919, - "step": 34480 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021333634931800297, - "loss": 3.2388, - "step": 34485 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002132575948409174, - "loss": 3.3324, - "step": 34490 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002131788468864162, - "loss": 3.1517, - "step": 34495 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002131001054604207, - "loss": 3.1907, - "step": 34500 - }, - { - "epoch": 0.6, - "eval_loss": 3.2766971588134766, - "eval_runtime": 150.1846, - "eval_samples_per_second": 12.258, - "eval_steps_per_second": 0.772, - "step": 34500 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021302137056885198, - "loss": 3.1657, - "step": 34505 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021294264221763032, - "loss": 3.2792, - "step": 34510 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002128639204126758, - "loss": 3.3099, - "step": 34515 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021278520515990808, - "loss": 3.345, - "step": 34520 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002127064964652459, - "loss": 3.2582, - "step": 34525 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002126277943346077, - "loss": 3.2024, - "step": 34530 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021254909877391163, - "loss": 3.2492, - "step": 34535 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021247040978907524, - "loss": 3.2715, - "step": 34540 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021239172738601539, - "loss": 3.2769, - "step": 34545 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021231305157064868, - "loss": 3.1182, - "step": 34550 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021223438234889102, - "loss": 3.2583, - "step": 34555 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021215571972665808, - "loss": 3.3762, - "step": 34560 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021207706370986483, - "loss": 3.3264, - "step": 34565 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021199841430442586, - "loss": 3.2889, - "step": 34570 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021191977151625505, - "loss": 3.2835, - "step": 34575 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021184113535126597, - "loss": 3.1812, - "step": 34580 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021176250581537176, - "loss": 3.2657, - "step": 34585 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021168388291448485, - "loss": 3.2983, - "step": 34590 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002116052666545175, - "loss": 3.2066, - "step": 34595 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002115266570413809, - "loss": 3.2254, - "step": 34600 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021144805408098634, - "loss": 3.2641, - "step": 34605 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021136945777924427, - "loss": 3.2156, - "step": 34610 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021129086814206474, - "loss": 3.3646, - "step": 34615 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021121228517535738, - "loss": 3.1861, - "step": 34620 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021113370888503106, - "loss": 3.2872, - "step": 34625 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021105513927699446, - "loss": 3.1778, - "step": 34630 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002109765763571555, - "loss": 3.3283, - "step": 34635 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021089802013142173, - "loss": 3.1781, - "step": 34640 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021081947060570035, - "loss": 3.2741, - "step": 34645 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021074092778589767, - "loss": 3.252, - "step": 34650 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002106623916779197, - "loss": 3.2506, - "step": 34655 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021058386228767206, - "loss": 3.2566, - "step": 34660 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021050533962105975, - "loss": 3.2547, - "step": 34665 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021042682368398717, - "loss": 3.2342, - "step": 34670 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021034831448235857, - "loss": 3.2487, - "step": 34675 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021026981202207712, - "loss": 3.308, - "step": 34680 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021019131630904602, - "loss": 3.3426, - "step": 34685 - }, - { - "epoch": 0.6, - "learning_rate": 0.00021011282734916757, - "loss": 3.2593, - "step": 34690 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002100343451483439, - "loss": 3.2209, - "step": 34695 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020995586971247647, - "loss": 3.1753, - "step": 34700 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020987740104746607, - "loss": 3.3474, - "step": 34705 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020979893915921332, - "loss": 3.2738, - "step": 34710 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020972048405361798, - "loss": 3.1795, - "step": 34715 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020964203573657959, - "loss": 3.1401, - "step": 34720 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020956359421399714, - "loss": 3.2246, - "step": 34725 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002094851594917689, - "loss": 3.2825, - "step": 34730 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020940673157579268, - "loss": 3.2241, - "step": 34735 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020932831047196595, - "loss": 3.2764, - "step": 34740 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002092498961861857, - "loss": 3.2176, - "step": 34745 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002091714887243482, - "loss": 3.2499, - "step": 34750 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020909308809234913, - "loss": 3.2197, - "step": 34755 - }, - { - "epoch": 0.6, - "learning_rate": 0.000209014694296084, - "loss": 3.2597, - "step": 34760 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020893630734144755, - "loss": 3.1445, - "step": 34765 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020885792723433406, - "loss": 3.2218, - "step": 34770 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002087795539806375, - "loss": 3.2191, - "step": 34775 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020870118758625086, - "loss": 3.3003, - "step": 34780 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020862282805706706, - "loss": 3.293, - "step": 34785 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002085444753989782, - "loss": 3.2902, - "step": 34790 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020846612961787612, - "loss": 3.2605, - "step": 34795 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002083877907196521, - "loss": 3.2403, - "step": 34800 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020830945871019656, - "loss": 3.1848, - "step": 34805 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020823113359539993, - "loss": 3.2072, - "step": 34810 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020815281538115164, - "loss": 3.235, - "step": 34815 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020807450407334097, - "loss": 3.2259, - "step": 34820 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002079961996778566, - "loss": 3.1593, - "step": 34825 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002079179022005864, - "loss": 3.2826, - "step": 34830 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020783961164741804, - "loss": 3.2567, - "step": 34835 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020776132802423851, - "loss": 3.2129, - "step": 34840 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020768305133693447, - "loss": 3.2653, - "step": 34845 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020760478159139196, - "loss": 3.1523, - "step": 34850 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002075265187934962, - "loss": 3.284, - "step": 34855 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002074482629491323, - "loss": 3.1118, - "step": 34860 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002073700140641848, - "loss": 3.2638, - "step": 34865 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020729177214453745, - "loss": 3.1445, - "step": 34870 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002072135371960738, - "loss": 3.2557, - "step": 34875 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020713530922467652, - "loss": 3.2218, - "step": 34880 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020705708823622805, - "loss": 3.3213, - "step": 34885 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020697887423661032, - "loss": 3.2697, - "step": 34890 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020690066723170447, - "loss": 3.2742, - "step": 34895 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002068224672273914, - "loss": 3.2105, - "step": 34900 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020674427422955115, - "loss": 3.2227, - "step": 34905 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020666608824406363, - "loss": 3.2625, - "step": 34910 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020658790927680787, - "loss": 3.1875, - "step": 34915 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020650973733366262, - "loss": 3.233, - "step": 34920 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020643157242050602, - "loss": 3.2415, - "step": 34925 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020635341454321575, - "loss": 3.2621, - "step": 34930 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020627526370766862, - "loss": 3.3244, - "step": 34935 - }, - { - "epoch": 0.6, - "learning_rate": 0.0002061971199197413, - "loss": 3.2816, - "step": 34940 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020611898318530995, - "loss": 3.1986, - "step": 34945 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020604085351024986, - "loss": 3.2335, - "step": 34950 - }, - { - "epoch": 0.6, - "learning_rate": 0.00020596273090043614, - "loss": 3.2378, - "step": 34955 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020588461536174304, - "loss": 3.1589, - "step": 34960 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002058065069000445, - "loss": 3.2316, - "step": 34965 - }, - { - "epoch": 0.61, - "learning_rate": 0.000205728405521214, - "loss": 3.2408, - "step": 34970 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002056503112311242, - "loss": 3.2371, - "step": 34975 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020557222403564757, - "loss": 3.2762, - "step": 34980 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002054941439406556, - "loss": 3.2734, - "step": 34985 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002054160709520198, - "loss": 3.1853, - "step": 34990 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020533800507561067, - "loss": 3.2824, - "step": 34995 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020525994631729837, - "loss": 3.19, - "step": 35000 - }, - { - "epoch": 0.61, - "eval_loss": 3.272491693496704, - "eval_runtime": 149.9711, - "eval_samples_per_second": 12.276, - "eval_steps_per_second": 0.773, - "step": 35000 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002051818946829527, - "loss": 3.2679, - "step": 35005 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020510385017844258, - "loss": 3.1934, - "step": 35010 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020502581280963656, - "loss": 3.2659, - "step": 35015 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002049477825824026, - "loss": 3.1719, - "step": 35020 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002048697595026084, - "loss": 3.2524, - "step": 35025 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020479174357612077, - "loss": 3.147, - "step": 35030 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020471373480880593, - "loss": 3.0605, - "step": 35035 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020463573320652996, - "loss": 3.2339, - "step": 35040 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020455773877515814, - "loss": 3.3027, - "step": 35045 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020447975152055518, - "loss": 3.2568, - "step": 35050 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020440177144858554, - "loss": 3.2592, - "step": 35055 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020432379856511257, - "loss": 3.2078, - "step": 35060 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002042458328759996, - "loss": 3.1336, - "step": 35065 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020416787438710937, - "loss": 3.2229, - "step": 35070 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020408992310430382, - "loss": 3.3117, - "step": 35075 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020401197903344463, - "loss": 3.2499, - "step": 35080 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020393404218039252, - "loss": 3.2453, - "step": 35085 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002038561125510082, - "loss": 3.1057, - "step": 35090 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002037781901511514, - "loss": 3.2666, - "step": 35095 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020370027498668158, - "loss": 3.0863, - "step": 35100 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002036223670634577, - "loss": 3.2022, - "step": 35105 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002035444663873378, - "loss": 3.3174, - "step": 35110 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020346657296417968, - "loss": 3.242, - "step": 35115 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020338868679984048, - "loss": 3.2349, - "step": 35120 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020331080790017702, - "loss": 3.3197, - "step": 35125 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020323293627104528, - "loss": 3.202, - "step": 35130 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002031550719183008, - "loss": 3.0995, - "step": 35135 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020307721484779854, - "loss": 3.0756, - "step": 35140 - }, - { - "epoch": 0.61, - "learning_rate": 0.000202999365065393, - "loss": 3.2164, - "step": 35145 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002029215225769381, - "loss": 3.1971, - "step": 35150 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020284368738828716, - "loss": 3.1615, - "step": 35155 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020276585950529312, - "loss": 3.2523, - "step": 35160 - }, - { - "epoch": 0.61, - "learning_rate": 0.000202688038933808, - "loss": 3.3051, - "step": 35165 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020261022567968372, - "loss": 3.1922, - "step": 35170 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002025324197487713, - "loss": 3.2387, - "step": 35175 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020245462114692138, - "loss": 3.3196, - "step": 35180 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002023768298799842, - "loss": 3.1601, - "step": 35185 - }, - { - "epoch": 0.61, - "learning_rate": 0.000202299045953809, - "loss": 3.2727, - "step": 35190 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020222126937424477, - "loss": 3.2983, - "step": 35195 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020214350014714, - "loss": 3.294, - "step": 35200 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020206573827834254, - "loss": 3.2709, - "step": 35205 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002019879837736997, - "loss": 3.2138, - "step": 35210 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020191023663905814, - "loss": 3.2238, - "step": 35215 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020183249688026405, - "loss": 3.2299, - "step": 35220 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002017547645031631, - "loss": 3.2146, - "step": 35225 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020167703951360039, - "loss": 3.1378, - "step": 35230 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002015993219174205, - "loss": 3.3198, - "step": 35235 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002015216117204671, - "loss": 3.1608, - "step": 35240 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020144390892858392, - "loss": 3.2125, - "step": 35245 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002013662135476137, - "loss": 3.2722, - "step": 35250 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020128852558339868, - "loss": 2.9673, - "step": 35255 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020121084504178078, - "loss": 3.0968, - "step": 35260 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020113317192860093, - "loss": 3.2643, - "step": 35265 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020105550624969988, - "loss": 3.0082, - "step": 35270 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020097784801091767, - "loss": 3.228, - "step": 35275 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020090019721809375, - "loss": 3.2276, - "step": 35280 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020082255387706733, - "loss": 3.1601, - "step": 35285 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020074491799367642, - "loss": 3.0885, - "step": 35290 - }, - { - "epoch": 0.61, - "learning_rate": 0.000200667289573759, - "loss": 3.2486, - "step": 35295 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020058966862315238, - "loss": 3.2406, - "step": 35300 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020051205514769313, - "loss": 3.1888, - "step": 35305 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020043444915321762, - "loss": 3.1852, - "step": 35310 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020035685064556118, - "loss": 3.2038, - "step": 35315 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002002792596305589, - "loss": 3.2482, - "step": 35320 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002002016761140452, - "loss": 3.2151, - "step": 35325 - }, - { - "epoch": 0.61, - "learning_rate": 0.00020012410010185406, - "loss": 3.2906, - "step": 35330 - }, - { - "epoch": 0.61, - "learning_rate": 0.0002000465315998188, - "loss": 3.2794, - "step": 35335 - }, - { - "epoch": 0.61, - "learning_rate": 0.000199968970613772, - "loss": 3.1682, - "step": 35340 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019989141714954592, - "loss": 3.2599, - "step": 35345 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001998138712129723, - "loss": 3.2017, - "step": 35350 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019973633280988206, - "loss": 3.3271, - "step": 35355 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001996588019461059, - "loss": 3.1123, - "step": 35360 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019958127862747344, - "loss": 3.2482, - "step": 35365 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019950376285981426, - "loss": 3.3316, - "step": 35370 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019942625464895703, - "loss": 3.2091, - "step": 35375 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019934875400072998, - "loss": 3.34, - "step": 35380 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019927126092096094, - "loss": 3.1714, - "step": 35385 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019919377541547677, - "loss": 3.142, - "step": 35390 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019911629749010408, - "loss": 3.2751, - "step": 35395 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019903882715066875, - "loss": 3.2666, - "step": 35400 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019896136440299622, - "loss": 3.3066, - "step": 35405 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001988839092529113, - "loss": 3.296, - "step": 35410 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019880646170623829, - "loss": 3.3092, - "step": 35415 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019872902176880058, - "loss": 3.2801, - "step": 35420 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019865158944642146, - "loss": 3.1055, - "step": 35425 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001985741647449235, - "loss": 3.2495, - "step": 35430 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001984967476701284, - "loss": 3.3053, - "step": 35435 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019841933822785788, - "loss": 3.2339, - "step": 35440 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001983419364239324, - "loss": 3.2144, - "step": 35445 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019826454226417236, - "loss": 3.2215, - "step": 35450 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019818715575439729, - "loss": 3.2162, - "step": 35455 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019810977690042634, - "loss": 3.3478, - "step": 35460 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001980324057080781, - "loss": 3.3434, - "step": 35465 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019795504218317024, - "loss": 3.2074, - "step": 35470 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001978776863315203, - "loss": 3.2835, - "step": 35475 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019780033815894495, - "loss": 3.2004, - "step": 35480 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019772299767126039, - "loss": 3.3079, - "step": 35485 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019764566487428235, - "loss": 3.2486, - "step": 35490 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001975683397738257, - "loss": 3.3032, - "step": 35495 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001974910223757049, - "loss": 3.2286, - "step": 35500 - }, - { - "epoch": 0.61, - "eval_loss": 3.266571283340454, - "eval_runtime": 150.2623, - "eval_samples_per_second": 12.252, - "eval_steps_per_second": 0.772, - "step": 35500 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019741371268573388, - "loss": 3.1576, - "step": 35505 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001973364107097259, - "loss": 3.2607, - "step": 35510 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019725911645349382, - "loss": 3.1451, - "step": 35515 - }, - { - "epoch": 0.61, - "learning_rate": 0.0001971818299228495, - "loss": 3.2446, - "step": 35520 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019710455112360468, - "loss": 3.3296, - "step": 35525 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019702728006157033, - "loss": 3.3272, - "step": 35530 - }, - { - "epoch": 0.61, - "learning_rate": 0.00019695001674255669, - "loss": 3.2667, - "step": 35535 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019687276117237384, - "loss": 3.3462, - "step": 35540 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019679551335683064, - "loss": 3.226, - "step": 35545 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019671827330173597, - "loss": 3.2596, - "step": 35550 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001966410410128979, - "loss": 3.2741, - "step": 35555 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019656381649612375, - "loss": 3.1513, - "step": 35560 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019648659975722064, - "loss": 3.2949, - "step": 35565 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019640939080199462, - "loss": 3.2267, - "step": 35570 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019633218963625157, - "loss": 3.2633, - "step": 35575 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001962549962657965, - "loss": 3.2205, - "step": 35580 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019617781069643402, - "loss": 3.2456, - "step": 35585 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019610063293396823, - "loss": 3.1869, - "step": 35590 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001960234629842023, - "loss": 3.2434, - "step": 35595 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019594630085293906, - "loss": 3.1685, - "step": 35600 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019586914654598066, - "loss": 3.2325, - "step": 35605 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001957920000691289, - "loss": 3.2937, - "step": 35610 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019571486142818458, - "loss": 3.2478, - "step": 35615 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019563773062894834, - "loss": 3.2374, - "step": 35620 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019556060767721983, - "loss": 3.1036, - "step": 35625 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001954834925787984, - "loss": 3.1363, - "step": 35630 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019540638533948267, - "loss": 3.1992, - "step": 35635 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019532928596507071, - "loss": 3.2498, - "step": 35640 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019525219446136013, - "loss": 3.3513, - "step": 35645 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019517511083414757, - "loss": 3.2661, - "step": 35650 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019509803508922953, - "loss": 3.2577, - "step": 35655 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019502096723240158, - "loss": 3.1112, - "step": 35660 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019494390726945888, - "loss": 3.2319, - "step": 35665 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019486685520619606, - "loss": 3.2341, - "step": 35670 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001947898110484069, - "loss": 3.1483, - "step": 35675 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001947127748018847, - "loss": 3.2717, - "step": 35680 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001946357464724222, - "loss": 3.2706, - "step": 35685 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019455872606581167, - "loss": 3.2078, - "step": 35690 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019448171358784464, - "loss": 3.2178, - "step": 35695 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001944047090443118, - "loss": 3.1824, - "step": 35700 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019432771244100373, - "loss": 3.2617, - "step": 35705 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019425072378371014, - "loss": 3.2644, - "step": 35710 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019417374307822012, - "loss": 3.2957, - "step": 35715 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019409677033032242, - "loss": 3.2295, - "step": 35720 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019401980554580468, - "loss": 3.2526, - "step": 35725 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019394284873045447, - "loss": 3.2768, - "step": 35730 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001938658998900585, - "loss": 3.1854, - "step": 35735 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001937889590304029, - "loss": 3.0675, - "step": 35740 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001937120261572734, - "loss": 3.3383, - "step": 35745 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019363510127645467, - "loss": 3.1992, - "step": 35750 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019355818439373127, - "loss": 3.262, - "step": 35755 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019348127551488682, - "loss": 3.2469, - "step": 35760 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019340437464570462, - "loss": 3.2135, - "step": 35765 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019332748179196724, - "loss": 3.2866, - "step": 35770 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019325059695945648, - "loss": 3.2102, - "step": 35775 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019317372015395366, - "loss": 3.2206, - "step": 35780 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019309685138123962, - "loss": 3.1866, - "step": 35785 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019301999064709455, - "loss": 3.2248, - "step": 35790 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019294313795729793, - "loss": 3.1514, - "step": 35795 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019286629331762868, - "loss": 3.1978, - "step": 35800 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019278945673386504, - "loss": 3.2829, - "step": 35805 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001927126282117848, - "loss": 3.2408, - "step": 35810 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001926358077571652, - "loss": 3.2113, - "step": 35815 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019255899537578262, - "loss": 3.223, - "step": 35820 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019248219107341287, - "loss": 3.1289, - "step": 35825 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001924053948558313, - "loss": 3.2393, - "step": 35830 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019232860672881272, - "loss": 3.1435, - "step": 35835 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019225182669813104, - "loss": 3.1946, - "step": 35840 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001921750547695598, - "loss": 3.1769, - "step": 35845 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019209829094887196, - "loss": 3.198, - "step": 35850 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001920215352418396, - "loss": 3.2421, - "step": 35855 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019194478765423433, - "loss": 3.0731, - "step": 35860 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019186804819182725, - "loss": 3.2731, - "step": 35865 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019179131686038883, - "loss": 3.1812, - "step": 35870 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019171459366568893, - "loss": 3.2939, - "step": 35875 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019163787861349647, - "loss": 3.1732, - "step": 35880 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019156117170958023, - "loss": 3.2444, - "step": 35885 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019148447295970817, - "loss": 3.1963, - "step": 35890 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019140778236964752, - "loss": 3.3122, - "step": 35895 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019133109994516528, - "loss": 3.2267, - "step": 35900 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019125442569202725, - "loss": 3.2838, - "step": 35905 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019117775961599912, - "loss": 3.3105, - "step": 35910 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001911011017228458, - "loss": 3.2765, - "step": 35915 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019102445201833152, - "loss": 3.3233, - "step": 35920 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019094781050822007, - "loss": 3.2274, - "step": 35925 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001908711771982742, - "loss": 3.2296, - "step": 35930 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001907945520942566, - "loss": 3.1826, - "step": 35935 - }, - { - "epoch": 0.62, - "learning_rate": 0.000190717935201929, - "loss": 3.3203, - "step": 35940 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001906413265270526, - "loss": 3.1889, - "step": 35945 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019056472607538807, - "loss": 2.9914, - "step": 35950 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001904881338526953, - "loss": 3.2326, - "step": 35955 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019041154986473356, - "loss": 3.2476, - "step": 35960 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019033497411726158, - "loss": 3.1318, - "step": 35965 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019025840661603764, - "loss": 3.266, - "step": 35970 - }, - { - "epoch": 0.62, - "learning_rate": 0.00019018184736681906, - "loss": 3.1782, - "step": 35975 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001901052963753628, - "loss": 3.1955, - "step": 35980 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001900287536474249, - "loss": 3.2449, - "step": 35985 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001899522191887612, - "loss": 2.9662, - "step": 35990 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001898756930051267, - "loss": 3.1855, - "step": 35995 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018979917510227574, - "loss": 3.1923, - "step": 36000 - }, - { - "epoch": 0.62, - "eval_loss": 3.261223554611206, - "eval_runtime": 150.469, - "eval_samples_per_second": 12.235, - "eval_steps_per_second": 0.771, - "step": 36000 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018972266548596193, - "loss": 3.2353, - "step": 36005 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018964616416193848, - "loss": 3.2502, - "step": 36010 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018956967113595806, - "loss": 3.2247, - "step": 36015 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018949318641377233, - "loss": 3.2834, - "step": 36020 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018941671000113276, - "loss": 3.2689, - "step": 36025 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018934024190378972, - "loss": 3.2487, - "step": 36030 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018926378212749347, - "loss": 3.1112, - "step": 36035 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018918733067799324, - "loss": 3.1531, - "step": 36040 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001891108875610378, - "loss": 3.2902, - "step": 36045 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018903445278237543, - "loss": 3.278, - "step": 36050 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018895802634775346, - "loss": 3.1309, - "step": 36055 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001888816082629188, - "loss": 3.1394, - "step": 36060 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018880519853361775, - "loss": 3.2527, - "step": 36065 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018872879716559584, - "loss": 3.1727, - "step": 36070 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018865240416459823, - "loss": 3.2331, - "step": 36075 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018857601953636922, - "loss": 3.1652, - "step": 36080 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018849964328665242, - "loss": 3.3314, - "step": 36085 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018842327542119096, - "loss": 3.2538, - "step": 36090 - }, - { - "epoch": 0.62, - "learning_rate": 0.00018834691594572746, - "loss": 3.2119, - "step": 36095 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001882705648660036, - "loss": 3.2479, - "step": 36100 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001881942221877608, - "loss": 3.2294, - "step": 36105 - }, - { - "epoch": 0.62, - "learning_rate": 0.0001881178879167393, - "loss": 3.1991, - "step": 36110 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001880415620586794, - "loss": 3.2136, - "step": 36115 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001879652446193201, - "loss": 3.1497, - "step": 36120 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001878889356044003, - "loss": 3.301, - "step": 36125 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001878126350196581, - "loss": 3.1176, - "step": 36130 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018773634287083072, - "loss": 3.3595, - "step": 36135 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018766005916365493, - "loss": 3.2179, - "step": 36140 - }, - { - "epoch": 0.63, - "learning_rate": 0.000187583783903867, - "loss": 3.2545, - "step": 36145 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018750751709720247, - "loss": 3.2765, - "step": 36150 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001874312587493961, - "loss": 3.2513, - "step": 36155 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018735500886618215, - "loss": 3.1349, - "step": 36160 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018727876745329425, - "loss": 3.278, - "step": 36165 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018720253451646524, - "loss": 3.1892, - "step": 36170 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018712631006142773, - "loss": 3.2387, - "step": 36175 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018705009409391324, - "loss": 3.2239, - "step": 36180 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001869738866196527, - "loss": 3.1069, - "step": 36185 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018689768764437663, - "loss": 3.3351, - "step": 36190 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018682149717381489, - "loss": 3.2367, - "step": 36195 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018674531521369644, - "loss": 3.2287, - "step": 36200 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018666914176975, - "loss": 3.3816, - "step": 36205 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018659297684770317, - "loss": 3.2868, - "step": 36210 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018651682045328334, - "loss": 3.2455, - "step": 36215 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018644067259221696, - "loss": 3.2449, - "step": 36220 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018636453327023004, - "loss": 3.2737, - "step": 36225 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018628840249304793, - "loss": 3.2192, - "step": 36230 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001862122802663951, - "loss": 3.2535, - "step": 36235 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018613616659599567, - "loss": 3.1544, - "step": 36240 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018606006148757292, - "loss": 3.2894, - "step": 36245 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001859839649468496, - "loss": 3.1255, - "step": 36250 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018590787697954795, - "loss": 3.132, - "step": 36255 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018583179759138916, - "loss": 3.1732, - "step": 36260 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018575572678809402, - "loss": 3.2779, - "step": 36265 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018567966457538274, - "loss": 3.2261, - "step": 36270 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018560361095897485, - "loss": 3.1753, - "step": 36275 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018552756594458923, - "loss": 3.2143, - "step": 36280 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018545152953794385, - "loss": 3.2923, - "step": 36285 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018537550174475639, - "loss": 3.1306, - "step": 36290 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018529948257074385, - "loss": 3.2825, - "step": 36295 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018522347202162232, - "loss": 3.3198, - "step": 36300 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018514747010310744, - "loss": 3.289, - "step": 36305 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018507147682091435, - "loss": 3.267, - "step": 36310 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018499549218075703, - "loss": 3.3149, - "step": 36315 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018491951618834947, - "loss": 3.1908, - "step": 36320 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018484354884940437, - "loss": 3.2189, - "step": 36325 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018476759016963426, - "loss": 3.1788, - "step": 36330 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018469164015475092, - "loss": 3.232, - "step": 36335 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018461569881046526, - "loss": 3.3037, - "step": 36340 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018453976614248762, - "loss": 3.2038, - "step": 36345 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018446384215652786, - "loss": 3.124, - "step": 36350 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018438792685829513, - "loss": 3.2536, - "step": 36355 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018431202025349783, - "loss": 3.1973, - "step": 36360 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018423612234784364, - "loss": 3.2563, - "step": 36365 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018416023314703973, - "loss": 3.2235, - "step": 36370 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018408435265679268, - "loss": 3.3684, - "step": 36375 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001840084808828082, - "loss": 3.2167, - "step": 36380 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001839326178307916, - "loss": 3.2903, - "step": 36385 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018385676350644723, - "loss": 3.0828, - "step": 36390 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001837809179154791, - "loss": 3.3216, - "step": 36395 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001837050810635902, - "loss": 3.2892, - "step": 36400 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018362925295648322, - "loss": 3.1734, - "step": 36405 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018355343359986018, - "loss": 3.1735, - "step": 36410 - }, - { - "epoch": 0.63, - "learning_rate": 0.000183477622999422, - "loss": 3.1576, - "step": 36415 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018340182116086947, - "loss": 3.2127, - "step": 36420 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018332602808990238, - "loss": 3.1634, - "step": 36425 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018325024379222006, - "loss": 3.3005, - "step": 36430 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018317446827352115, - "loss": 3.2287, - "step": 36435 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018309870153950347, - "loss": 3.23, - "step": 36440 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018302294359586426, - "loss": 3.208, - "step": 36445 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001829471944483002, - "loss": 3.2683, - "step": 36450 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018287145410250723, - "loss": 3.1638, - "step": 36455 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018279572256418077, - "loss": 3.2345, - "step": 36460 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018271999983901515, - "loss": 3.2936, - "step": 36465 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018264428593270452, - "loss": 3.2147, - "step": 36470 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001825685808509422, - "loss": 3.1711, - "step": 36475 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018249288459942073, - "loss": 3.1448, - "step": 36480 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001824171971838322, - "loss": 3.2079, - "step": 36485 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018234151860986773, - "loss": 3.1737, - "step": 36490 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018226584888321812, - "loss": 3.2266, - "step": 36495 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018219018800957334, - "loss": 3.154, - "step": 36500 - }, - { - "epoch": 0.63, - "eval_loss": 3.2557082176208496, - "eval_runtime": 151.364, - "eval_samples_per_second": 12.163, - "eval_steps_per_second": 0.766, - "step": 36500 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018211453599462256, - "loss": 3.2151, - "step": 36505 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018203889284405467, - "loss": 3.3157, - "step": 36510 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018196325856355736, - "loss": 3.1872, - "step": 36515 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018188763315881817, - "loss": 3.2263, - "step": 36520 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018181201663552353, - "loss": 3.2157, - "step": 36525 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018173640899935958, - "loss": 3.2489, - "step": 36530 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001816608102560117, - "loss": 3.2881, - "step": 36535 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001815852204111643, - "loss": 3.2426, - "step": 36540 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018150963947050145, - "loss": 3.2455, - "step": 36545 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018143406743970642, - "loss": 3.3231, - "step": 36550 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018135850432446197, - "loss": 3.3465, - "step": 36555 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018128295013044993, - "loss": 3.2252, - "step": 36560 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001812074048633517, - "loss": 3.2743, - "step": 36565 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018113186852884773, - "loss": 3.2134, - "step": 36570 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018105634113261805, - "loss": 3.1525, - "step": 36575 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018098082268034197, - "loss": 3.2405, - "step": 36580 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018090531317769805, - "loss": 3.1579, - "step": 36585 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001808298126303643, - "loss": 3.3345, - "step": 36590 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018075432104401782, - "loss": 3.2023, - "step": 36595 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018067883842433534, - "loss": 3.3086, - "step": 36600 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018060336477699262, - "loss": 3.2832, - "step": 36605 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018052790010766496, - "loss": 3.311, - "step": 36610 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018045244442202707, - "loss": 3.263, - "step": 36615 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018037699772575262, - "loss": 3.2083, - "step": 36620 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001803015600245148, - "loss": 3.3219, - "step": 36625 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018022613132398624, - "loss": 3.271, - "step": 36630 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018015071162983884, - "loss": 3.1227, - "step": 36635 - }, - { - "epoch": 0.63, - "learning_rate": 0.00018007530094774375, - "loss": 3.2264, - "step": 36640 - }, - { - "epoch": 0.63, - "learning_rate": 0.00017999989928337132, - "loss": 3.2169, - "step": 36645 - }, - { - "epoch": 0.63, - "learning_rate": 0.00017992450664239149, - "loss": 3.1578, - "step": 36650 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001798491230304734, - "loss": 3.1818, - "step": 36655 - }, - { - "epoch": 0.63, - "learning_rate": 0.00017977374845328547, - "loss": 3.1602, - "step": 36660 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001796983829164956, - "loss": 3.1913, - "step": 36665 - }, - { - "epoch": 0.63, - "learning_rate": 0.0001796230264257707, - "loss": 3.3033, - "step": 36670 - }, - { - "epoch": 0.63, - "learning_rate": 0.00017954767898677726, - "loss": 3.2693, - "step": 36675 - }, - { - "epoch": 0.63, - "learning_rate": 0.00017947234060518115, - "loss": 3.2891, - "step": 36680 - }, - { - "epoch": 0.63, - "learning_rate": 0.00017939701128664727, - "loss": 3.2005, - "step": 36685 - }, - { - "epoch": 0.63, - "learning_rate": 0.00017932169103684016, - "loss": 3.1168, - "step": 36690 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001792463798614233, - "loss": 3.2606, - "step": 36695 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001791710777660599, - "loss": 3.1485, - "step": 36700 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017909578475641213, - "loss": 3.0826, - "step": 36705 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017902050083814174, - "loss": 3.2234, - "step": 36710 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017894522601690974, - "loss": 3.36, - "step": 36715 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017886996029837634, - "loss": 3.2038, - "step": 36720 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001787947036882011, - "loss": 3.1815, - "step": 36725 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017871945619204294, - "loss": 3.265, - "step": 36730 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001786442178155602, - "loss": 3.2302, - "step": 36735 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017856898856441036, - "loss": 3.1937, - "step": 36740 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017849376844425024, - "loss": 3.1804, - "step": 36745 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017841855746073595, - "loss": 3.2216, - "step": 36750 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017834335561952306, - "loss": 3.3077, - "step": 36755 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001782681629262664, - "loss": 3.1248, - "step": 36760 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001781929793866201, - "loss": 3.1452, - "step": 36765 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017811780500623733, - "loss": 3.2719, - "step": 36770 - }, - { - "epoch": 0.64, - "learning_rate": 0.000178042639790771, - "loss": 3.2455, - "step": 36775 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017796748374587323, - "loss": 3.1196, - "step": 36780 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001778923368771952, - "loss": 3.329, - "step": 36785 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001778171991903876, - "loss": 3.1396, - "step": 36790 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017774207069110064, - "loss": 3.2559, - "step": 36795 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017766695138498332, - "loss": 3.2072, - "step": 36800 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017759184127768424, - "loss": 3.3362, - "step": 36805 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017751674037485136, - "loss": 3.2532, - "step": 36810 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017744164868213193, - "loss": 3.2233, - "step": 36815 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017736656620517246, - "loss": 3.2606, - "step": 36820 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001772914929496187, - "loss": 3.1516, - "step": 36825 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017721642892111573, - "loss": 3.1829, - "step": 36830 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017714137412530806, - "loss": 3.2415, - "step": 36835 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017706632856783952, - "loss": 3.2393, - "step": 36840 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017699129225435303, - "loss": 3.192, - "step": 36845 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017691626519049085, - "loss": 3.3051, - "step": 36850 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017684124738189476, - "loss": 3.2575, - "step": 36855 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017676623883420574, - "loss": 3.1901, - "step": 36860 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017669123955306394, - "loss": 3.1218, - "step": 36865 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017661624954410905, - "loss": 3.2743, - "step": 36870 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001765412688129798, - "loss": 3.2173, - "step": 36875 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001764662973653145, - "loss": 3.1673, - "step": 36880 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017639133520675043, - "loss": 3.3145, - "step": 36885 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017631638234292448, - "loss": 3.2983, - "step": 36890 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017624143877947282, - "loss": 3.1276, - "step": 36895 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017616650452203066, - "loss": 3.0952, - "step": 36900 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017609157957623265, - "loss": 3.1819, - "step": 36905 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017601666394771287, - "loss": 3.13, - "step": 36910 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017594175764210453, - "loss": 3.1295, - "step": 36915 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017586686066504035, - "loss": 3.2718, - "step": 36920 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017579197302215204, - "loss": 3.3466, - "step": 36925 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001757170947190707, - "loss": 3.1818, - "step": 36930 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017564222576142694, - "loss": 3.2946, - "step": 36935 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001755673661548505, - "loss": 3.2415, - "step": 36940 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001754925159049705, - "loss": 3.2438, - "step": 36945 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001754176750174151, - "loss": 3.2137, - "step": 36950 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017534284349781203, - "loss": 3.1935, - "step": 36955 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017526802135178836, - "loss": 3.281, - "step": 36960 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017519320858497018, - "loss": 3.185, - "step": 36965 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001751184052029832, - "loss": 3.2558, - "step": 36970 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001750436112114521, - "loss": 3.3523, - "step": 36975 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017496882661600095, - "loss": 3.2183, - "step": 36980 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017489405142225332, - "loss": 3.308, - "step": 36985 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001748192856358319, - "loss": 3.2556, - "step": 36990 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017474452926235864, - "loss": 3.2036, - "step": 36995 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017466978230745486, - "loss": 3.2368, - "step": 37000 - }, - { - "epoch": 0.64, - "eval_loss": 3.2513468265533447, - "eval_runtime": 149.7793, - "eval_samples_per_second": 12.291, - "eval_steps_per_second": 0.774, - "step": 37000 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017459504477674104, - "loss": 3.2079, - "step": 37005 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017452031667583728, - "loss": 3.0015, - "step": 37010 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017444559801036262, - "loss": 3.2196, - "step": 37015 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017437088878593542, - "loss": 3.2114, - "step": 37020 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001742961890081738, - "loss": 3.3074, - "step": 37025 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017422149868269427, - "loss": 3.1725, - "step": 37030 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017414681781511356, - "loss": 3.2638, - "step": 37035 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017407214641104715, - "loss": 3.1967, - "step": 37040 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017399748447610987, - "loss": 3.1839, - "step": 37045 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017392283201591625, - "loss": 3.251, - "step": 37050 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001738481890360794, - "loss": 3.1605, - "step": 37055 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001737735555422121, - "loss": 3.2344, - "step": 37060 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017369893153992662, - "loss": 3.1954, - "step": 37065 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017362431703483425, - "loss": 3.2549, - "step": 37070 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017354971203254553, - "loss": 3.2404, - "step": 37075 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017347511653867045, - "loss": 3.249, - "step": 37080 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017340053055881803, - "loss": 3.2248, - "step": 37085 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017332595409859706, - "loss": 3.2577, - "step": 37090 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001732513871636151, - "loss": 3.249, - "step": 37095 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017317682975947926, - "loss": 3.2418, - "step": 37100 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001731022818917959, - "loss": 3.1526, - "step": 37105 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017302774356617052, - "loss": 3.2873, - "step": 37110 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017295321478820805, - "loss": 3.1383, - "step": 37115 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017287869556351283, - "loss": 3.2247, - "step": 37120 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001728041858976882, - "loss": 3.1668, - "step": 37125 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017272968579633695, - "loss": 3.2901, - "step": 37130 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017265519526506108, - "loss": 3.2611, - "step": 37135 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017258071430946174, - "loss": 3.1702, - "step": 37140 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001725062429351398, - "loss": 3.2486, - "step": 37145 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017243178114769505, - "loss": 3.2305, - "step": 37150 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017235732895272648, - "loss": 3.2179, - "step": 37155 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001722828863558327, - "loss": 3.2362, - "step": 37160 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017220845336261118, - "loss": 3.2262, - "step": 37165 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017213402997865915, - "loss": 3.1945, - "step": 37170 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001720596162095729, - "loss": 3.2624, - "step": 37175 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001719852120609476, - "loss": 3.0769, - "step": 37180 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017191081753837836, - "loss": 3.1758, - "step": 37185 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017183643264745925, - "loss": 3.2337, - "step": 37190 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017176205739378344, - "loss": 3.1392, - "step": 37195 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001716876917829439, - "loss": 3.2752, - "step": 37200 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017161333582053217, - "loss": 3.3017, - "step": 37205 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001715389895121397, - "loss": 3.1933, - "step": 37210 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017146465286335688, - "loss": 3.2897, - "step": 37215 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017139032587977333, - "loss": 3.2704, - "step": 37220 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017131600856697834, - "loss": 3.221, - "step": 37225 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017124170093055983, - "loss": 3.2712, - "step": 37230 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017116740297610565, - "loss": 3.1784, - "step": 37235 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001710931147092025, - "loss": 3.1287, - "step": 37240 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017101883613543636, - "loss": 3.3075, - "step": 37245 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017094456726039284, - "loss": 3.2108, - "step": 37250 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017087030808965654, - "loss": 3.167, - "step": 37255 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017079605862881114, - "loss": 3.0637, - "step": 37260 - }, - { - "epoch": 0.64, - "learning_rate": 0.00017072181888344004, - "loss": 3.2903, - "step": 37265 - }, - { - "epoch": 0.64, - "learning_rate": 0.0001706475888591256, - "loss": 3.3106, - "step": 37270 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001705733685614495, - "loss": 3.2914, - "step": 37275 - }, - { - "epoch": 0.65, - "learning_rate": 0.00017049915799599298, - "loss": 3.2675, - "step": 37280 - }, - { - "epoch": 0.65, - "learning_rate": 0.00017042495716833588, - "loss": 3.2125, - "step": 37285 - }, - { - "epoch": 0.65, - "learning_rate": 0.00017035076608405803, - "loss": 3.197, - "step": 37290 - }, - { - "epoch": 0.65, - "learning_rate": 0.00017027658474873817, - "loss": 3.1445, - "step": 37295 - }, - { - "epoch": 0.65, - "learning_rate": 0.00017020241316795416, - "loss": 3.2512, - "step": 37300 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001701282513472837, - "loss": 3.2431, - "step": 37305 - }, - { - "epoch": 0.65, - "learning_rate": 0.00017005409929230297, - "loss": 3.1504, - "step": 37310 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016997995700858808, - "loss": 3.185, - "step": 37315 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016990582450171412, - "loss": 3.0925, - "step": 37320 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016983170177725532, - "loss": 3.0949, - "step": 37325 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016975758884078566, - "loss": 3.2816, - "step": 37330 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016968348569787773, - "loss": 3.3073, - "step": 37335 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001696093923541037, - "loss": 3.3084, - "step": 37340 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001695353088150352, - "loss": 3.1617, - "step": 37345 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016946123508624288, - "loss": 3.2592, - "step": 37350 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001693871711732967, - "loss": 3.2328, - "step": 37355 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016931311708176587, - "loss": 3.217, - "step": 37360 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001692390728172187, - "loss": 3.2719, - "step": 37365 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016916503838522331, - "loss": 3.069, - "step": 37370 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001690910137913465, - "loss": 3.3553, - "step": 37375 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016901699904115454, - "loss": 3.1695, - "step": 37380 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016894299414021294, - "loss": 3.2769, - "step": 37385 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016886899909408643, - "loss": 3.126, - "step": 37390 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016879501390833928, - "loss": 3.265, - "step": 37395 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001687210385885347, - "loss": 3.2026, - "step": 37400 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016864707314023517, - "loss": 3.2575, - "step": 37405 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001685731175690026, - "loss": 3.2137, - "step": 37410 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016849917188039803, - "loss": 3.2626, - "step": 37415 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001684252360799817, - "loss": 3.2544, - "step": 37420 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016835131017331337, - "loss": 3.3139, - "step": 37425 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016827739416595187, - "loss": 3.2438, - "step": 37430 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016820348806345525, - "loss": 3.1817, - "step": 37435 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001681295918713809, - "loss": 3.2638, - "step": 37440 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016805570559528526, - "loss": 3.2733, - "step": 37445 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001679818292407245, - "loss": 3.2396, - "step": 37450 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001679079628132536, - "loss": 3.2589, - "step": 37455 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016783410631842691, - "loss": 3.2473, - "step": 37460 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001677602597617981, - "loss": 3.19, - "step": 37465 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001676864231489199, - "loss": 3.2841, - "step": 37470 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016761259648534477, - "loss": 3.3538, - "step": 37475 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016753877977662384, - "loss": 3.3771, - "step": 37480 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001674649730283079, - "loss": 3.1082, - "step": 37485 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016739117624594665, - "loss": 3.1594, - "step": 37490 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001673173894350894, - "loss": 3.2678, - "step": 37495 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016724361260128434, - "loss": 3.2161, - "step": 37500 - }, - { - "epoch": 0.65, - "eval_loss": 3.246263027191162, - "eval_runtime": 150.2834, - "eval_samples_per_second": 12.25, - "eval_steps_per_second": 0.772, - "step": 37500 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001671698457500794, - "loss": 3.1662, - "step": 37505 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016709608888702124, - "loss": 3.2584, - "step": 37510 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016702234201765613, - "loss": 3.2162, - "step": 37515 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016694860514752938, - "loss": 3.1176, - "step": 37520 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016687487828218557, - "loss": 3.1931, - "step": 37525 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016680116142716873, - "loss": 3.2113, - "step": 37530 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016672745458802193, - "loss": 3.2477, - "step": 37535 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016665375777028755, - "loss": 3.1498, - "step": 37540 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016658007097950715, - "loss": 3.3016, - "step": 37545 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016650639422122158, - "loss": 3.194, - "step": 37550 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001664327275009711, - "loss": 3.137, - "step": 37555 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016635907082429514, - "loss": 3.2968, - "step": 37560 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001662854241967319, - "loss": 3.2957, - "step": 37565 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016621178762381964, - "loss": 3.2532, - "step": 37570 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016613816111109525, - "loss": 3.1561, - "step": 37575 - }, - { - "epoch": 0.65, - "learning_rate": 0.000166064544664095, - "loss": 3.1695, - "step": 37580 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016599093828835484, - "loss": 3.1958, - "step": 37585 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016591734198940906, - "loss": 3.18, - "step": 37590 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016584375577279219, - "loss": 3.1963, - "step": 37595 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016577017964403728, - "loss": 3.1112, - "step": 37600 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016569661360867687, - "loss": 3.1904, - "step": 37605 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016562305767224304, - "loss": 3.2197, - "step": 37610 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016554951184026646, - "loss": 3.2805, - "step": 37615 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016547597611827746, - "loss": 3.237, - "step": 37620 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016540245051180572, - "loss": 3.1968, - "step": 37625 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016532893502637993, - "loss": 3.2983, - "step": 37630 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016525542966752804, - "loss": 3.2977, - "step": 37635 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016518193444077728, - "loss": 3.2517, - "step": 37640 - }, - { - "epoch": 0.65, - "learning_rate": 0.000165108449351654, - "loss": 3.1764, - "step": 37645 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001650349744056841, - "loss": 3.1407, - "step": 37650 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001649615096083925, - "loss": 3.3731, - "step": 37655 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016488805496530328, - "loss": 3.2043, - "step": 37660 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001648146104819399, - "loss": 3.214, - "step": 37665 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001647411761638249, - "loss": 3.1209, - "step": 37670 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016466775201648032, - "loss": 3.2196, - "step": 37675 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016459433804542727, - "loss": 3.3123, - "step": 37680 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016452093425618606, - "loss": 3.245, - "step": 37685 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016444754065427625, - "loss": 3.1629, - "step": 37690 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016437415724521672, - "loss": 3.2326, - "step": 37695 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016430078403452535, - "loss": 3.1992, - "step": 37700 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016422742102771969, - "loss": 3.2132, - "step": 37705 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016415406823031616, - "loss": 3.179, - "step": 37710 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016408072564783047, - "loss": 3.2493, - "step": 37715 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016400739328577767, - "loss": 3.203, - "step": 37720 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016393407114967184, - "loss": 3.1359, - "step": 37725 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016386075924502657, - "loss": 3.2161, - "step": 37730 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001637874575773546, - "loss": 3.1806, - "step": 37735 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016371416615216765, - "loss": 3.2406, - "step": 37740 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016364088497497706, - "loss": 3.1397, - "step": 37745 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016356761405129287, - "loss": 3.1923, - "step": 37750 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001634943533866251, - "loss": 3.3604, - "step": 37755 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016342110298648232, - "loss": 3.2568, - "step": 37760 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016334786285637264, - "loss": 3.2019, - "step": 37765 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016327463300180335, - "loss": 3.1786, - "step": 37770 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016320141342828095, - "loss": 3.0524, - "step": 37775 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016312820414131107, - "loss": 3.3086, - "step": 37780 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016305500514639888, - "loss": 3.1939, - "step": 37785 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016298181644904854, - "loss": 3.307, - "step": 37790 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016290863805476331, - "loss": 3.2686, - "step": 37795 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016283546996904595, - "loss": 3.1524, - "step": 37800 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001627623121973982, - "loss": 3.1401, - "step": 37805 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001626891647453213, - "loss": 3.1872, - "step": 37810 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016261602761831554, - "loss": 3.4207, - "step": 37815 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016254290082188044, - "loss": 3.2811, - "step": 37820 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016246978436151472, - "loss": 3.2386, - "step": 37825 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016239667824271626, - "loss": 3.2014, - "step": 37830 - }, - { - "epoch": 0.65, - "learning_rate": 0.0001623235824709825, - "loss": 3.1844, - "step": 37835 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016225049705180988, - "loss": 3.1639, - "step": 37840 - }, - { - "epoch": 0.65, - "learning_rate": 0.00016217742199069375, - "loss": 3.2266, - "step": 37845 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016210435729312918, - "loss": 3.1546, - "step": 37850 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001620313029646103, - "loss": 3.2589, - "step": 37855 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016195825901063024, - "loss": 3.1717, - "step": 37860 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016188522543668187, - "loss": 3.1791, - "step": 37865 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016181220224825647, - "loss": 3.2002, - "step": 37870 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016173918945084538, - "loss": 3.0784, - "step": 37875 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016166618704993867, - "loss": 3.2606, - "step": 37880 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016159319505102565, - "loss": 3.2025, - "step": 37885 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016152021345959528, - "loss": 3.119, - "step": 37890 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016144724228113488, - "loss": 3.3311, - "step": 37895 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016137428152113197, - "loss": 3.2303, - "step": 37900 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001613013311850726, - "loss": 3.306, - "step": 37905 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016122839127844218, - "loss": 3.2077, - "step": 37910 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001611554618067258, - "loss": 3.1467, - "step": 37915 - }, - { - "epoch": 0.66, - "learning_rate": 0.000161082542775407, - "loss": 3.2522, - "step": 37920 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001610096341899689, - "loss": 3.2779, - "step": 37925 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016093673605589416, - "loss": 3.1941, - "step": 37930 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016086384837866414, - "loss": 3.1619, - "step": 37935 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016079097116375958, - "loss": 3.2503, - "step": 37940 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016071810441666077, - "loss": 3.2323, - "step": 37945 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016064524814284644, - "loss": 3.1909, - "step": 37950 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016057240234779543, - "loss": 3.1922, - "step": 37955 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001604995670369852, - "loss": 3.2015, - "step": 37960 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016042674221589242, - "loss": 3.1244, - "step": 37965 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016035392788999364, - "loss": 3.2906, - "step": 37970 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016028112406476353, - "loss": 3.2155, - "step": 37975 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016020833074567695, - "loss": 3.2215, - "step": 37980 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001601355479382075, - "loss": 3.2019, - "step": 37985 - }, - { - "epoch": 0.66, - "learning_rate": 0.00016006277564782796, - "loss": 3.1993, - "step": 37990 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015999001388001074, - "loss": 3.3097, - "step": 37995 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015991726264022677, - "loss": 3.1445, - "step": 38000 - }, - { - "epoch": 0.66, - "eval_loss": 3.2400145530700684, - "eval_runtime": 149.8729, - "eval_samples_per_second": 12.284, - "eval_steps_per_second": 0.774, - "step": 38000 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001598445219339467, - "loss": 3.2372, - "step": 38005 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015977179176664035, - "loss": 3.2157, - "step": 38010 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015969907214377662, - "loss": 3.2457, - "step": 38015 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015962636307082363, - "loss": 3.2293, - "step": 38020 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015955366455324874, - "loss": 3.1265, - "step": 38025 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001594809765965184, - "loss": 3.2626, - "step": 38030 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015940829920609858, - "loss": 3.2296, - "step": 38035 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015933563238745412, - "loss": 3.2178, - "step": 38040 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015926297614604924, - "loss": 3.2407, - "step": 38045 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015919033048734726, - "loss": 3.2555, - "step": 38050 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015911769541681066, - "loss": 3.2065, - "step": 38055 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015904507093990152, - "loss": 3.2034, - "step": 38060 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015897245706208063, - "loss": 3.3155, - "step": 38065 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015889985378880818, - "loss": 3.2745, - "step": 38070 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015882726112554364, - "loss": 3.2874, - "step": 38075 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015875467907774558, - "loss": 3.1474, - "step": 38080 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015868210765087167, - "loss": 3.2925, - "step": 38085 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015860954685037912, - "loss": 3.1178, - "step": 38090 - }, - { - "epoch": 0.66, - "learning_rate": 0.000158536996681724, - "loss": 3.2575, - "step": 38095 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001584644571503618, - "loss": 3.2266, - "step": 38100 - }, - { - "epoch": 0.66, - "learning_rate": 0.000158391928261747, - "loss": 3.0909, - "step": 38105 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015831941002133343, - "loss": 3.1911, - "step": 38110 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001582469024345742, - "loss": 3.1309, - "step": 38115 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015817440550692152, - "loss": 3.1826, - "step": 38120 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001581019192438265, - "loss": 3.212, - "step": 38125 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015802944365074005, - "loss": 3.3265, - "step": 38130 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015795697873311184, - "loss": 3.1416, - "step": 38135 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015788452449639077, - "loss": 3.0383, - "step": 38140 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015781208094602537, - "loss": 3.159, - "step": 38145 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001577396480874625, - "loss": 3.2361, - "step": 38150 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001576672259261492, - "loss": 3.2279, - "step": 38155 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015759481446753098, - "loss": 3.248, - "step": 38160 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015752241371705286, - "loss": 3.1824, - "step": 38165 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015745002368015913, - "loss": 3.1641, - "step": 38170 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015737764436229306, - "loss": 3.1412, - "step": 38175 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001573052757688972, - "loss": 3.2713, - "step": 38180 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015723291790541334, - "loss": 3.226, - "step": 38185 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015716057077728223, - "loss": 3.3351, - "step": 38190 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001570882343899443, - "loss": 3.2517, - "step": 38195 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015701590874883883, - "loss": 3.1853, - "step": 38200 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015694359385940403, - "loss": 3.1761, - "step": 38205 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001568712897270779, - "loss": 3.1338, - "step": 38210 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001567989963572973, - "loss": 3.2637, - "step": 38215 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015672671375549814, - "loss": 3.2493, - "step": 38220 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015665444192711604, - "loss": 3.2718, - "step": 38225 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015658218087758507, - "loss": 3.2829, - "step": 38230 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015650993061233924, - "loss": 3.3045, - "step": 38235 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015643769113681124, - "loss": 3.2175, - "step": 38240 - }, - { - "epoch": 0.66, - "learning_rate": 0.000156365462456433, - "loss": 3.2631, - "step": 38245 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015629324457663605, - "loss": 3.3236, - "step": 38250 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015622103750285047, - "loss": 3.2581, - "step": 38255 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015614884124050607, - "loss": 3.287, - "step": 38260 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015607665579503164, - "loss": 3.3278, - "step": 38265 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015600448117185492, - "loss": 3.1982, - "step": 38270 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015593231737640352, - "loss": 3.2156, - "step": 38275 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015586016441410341, - "loss": 3.2382, - "step": 38280 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015578802229038007, - "loss": 3.2535, - "step": 38285 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015571589101065855, - "loss": 3.3024, - "step": 38290 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015564377058036258, - "loss": 3.1432, - "step": 38295 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015557166100491518, - "loss": 3.1777, - "step": 38300 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015549956228973875, - "loss": 3.2004, - "step": 38305 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015542747444025455, - "loss": 3.0625, - "step": 38310 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015535539746188348, - "loss": 3.1938, - "step": 38315 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015528333136004523, - "loss": 3.221, - "step": 38320 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015521127614015877, - "loss": 3.2017, - "step": 38325 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015513923180764233, - "loss": 3.2288, - "step": 38330 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015506719836791313, - "loss": 3.2243, - "step": 38335 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015499517582638798, - "loss": 3.232, - "step": 38340 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015492316418848248, - "loss": 3.1439, - "step": 38345 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001548511634596115, - "loss": 3.1259, - "step": 38350 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015477917364518915, - "loss": 3.1856, - "step": 38355 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001547071947506287, - "loss": 3.2, - "step": 38360 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015463522678134248, - "loss": 3.1343, - "step": 38365 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015456326974274233, - "loss": 3.2869, - "step": 38370 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015449132364023897, - "loss": 3.0858, - "step": 38375 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015441938847924233, - "loss": 3.1788, - "step": 38380 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001543474642651616, - "loss": 3.2329, - "step": 38385 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015427555100340498, - "loss": 3.196, - "step": 38390 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001542036486993802, - "loss": 3.1304, - "step": 38395 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015413175735849388, - "loss": 3.2239, - "step": 38400 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015405987698615184, - "loss": 3.0827, - "step": 38405 - }, - { - "epoch": 0.66, - "learning_rate": 0.0001539880075877591, - "loss": 3.2181, - "step": 38410 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015391614916871978, - "loss": 3.2241, - "step": 38415 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015384430173443747, - "loss": 3.2218, - "step": 38420 - }, - { - "epoch": 0.66, - "learning_rate": 0.00015377246529031465, - "loss": 3.098, - "step": 38425 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015370063984175304, - "loss": 3.2373, - "step": 38430 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015362882539415352, - "loss": 3.2509, - "step": 38435 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001535570219529162, - "loss": 3.1837, - "step": 38440 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001534852295234402, - "loss": 3.2769, - "step": 38445 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001534134481111242, - "loss": 3.1648, - "step": 38450 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015334167772136566, - "loss": 3.1912, - "step": 38455 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015326991835956132, - "loss": 3.1258, - "step": 38460 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015319817003110715, - "loss": 3.2684, - "step": 38465 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015312643274139812, - "loss": 3.2462, - "step": 38470 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001530547064958288, - "loss": 3.1875, - "step": 38475 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015298299129979238, - "loss": 3.2456, - "step": 38480 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015291128715868164, - "loss": 3.1945, - "step": 38485 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015283959407788825, - "loss": 3.2784, - "step": 38490 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015276791206280307, - "loss": 3.0896, - "step": 38495 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001526962411188165, - "loss": 3.1892, - "step": 38500 - }, - { - "epoch": 0.67, - "eval_loss": 3.235760450363159, - "eval_runtime": 149.9771, - "eval_samples_per_second": 12.275, - "eval_steps_per_second": 0.773, - "step": 38500 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001526245812513178, - "loss": 3.1669, - "step": 38505 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015255293246569504, - "loss": 3.2019, - "step": 38510 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015248129476733626, - "loss": 3.2363, - "step": 38515 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015240966816162807, - "loss": 3.2336, - "step": 38520 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001523380526539563, - "loss": 3.2384, - "step": 38525 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001522664482497065, - "loss": 3.1823, - "step": 38530 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015219485495426242, - "loss": 3.2383, - "step": 38535 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015212327277300784, - "loss": 3.2228, - "step": 38540 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015205170171132536, - "loss": 3.0597, - "step": 38545 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001519801417745965, - "loss": 3.2417, - "step": 38550 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015190859296820264, - "loss": 3.2426, - "step": 38555 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001518370552975234, - "loss": 3.1934, - "step": 38560 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015176552876793847, - "loss": 3.2366, - "step": 38565 - }, - { - "epoch": 0.67, - "learning_rate": 0.000151694013384826, - "loss": 3.1862, - "step": 38570 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015162250915356357, - "loss": 3.2587, - "step": 38575 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015155101607952827, - "loss": 3.1882, - "step": 38580 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015147953416809565, - "loss": 3.2082, - "step": 38585 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015140806342464077, - "loss": 3.204, - "step": 38590 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015133660385453814, - "loss": 3.2401, - "step": 38595 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015126515546316102, - "loss": 3.1105, - "step": 38600 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015119371825588192, - "loss": 3.2315, - "step": 38605 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015112229223807262, - "loss": 3.2599, - "step": 38610 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015105087741510386, - "loss": 3.1626, - "step": 38615 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015097947379234583, - "loss": 3.0691, - "step": 38620 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015090808137516767, - "loss": 3.1392, - "step": 38625 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001508367001689377, - "loss": 3.2198, - "step": 38630 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015076533017902347, - "loss": 3.1879, - "step": 38635 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015069397141079154, - "loss": 3.228, - "step": 38640 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015062262386960772, - "loss": 3.2194, - "step": 38645 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001505512875608371, - "loss": 3.2814, - "step": 38650 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015047996248984376, - "loss": 3.1777, - "step": 38655 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015040864866199097, - "loss": 3.221, - "step": 38660 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015033734608264115, - "loss": 3.2044, - "step": 38665 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015026605475715581, - "loss": 3.2661, - "step": 38670 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015019477469089586, - "loss": 3.2046, - "step": 38675 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001501235058892211, - "loss": 3.1087, - "step": 38680 - }, - { - "epoch": 0.67, - "learning_rate": 0.00015005224835749063, - "loss": 3.2581, - "step": 38685 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014998100210106258, - "loss": 3.1716, - "step": 38690 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014990976712529424, - "loss": 3.1427, - "step": 38695 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014983854343554232, - "loss": 3.1025, - "step": 38700 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001497673310371624, - "loss": 3.1843, - "step": 38705 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001496961299355092, - "loss": 3.2923, - "step": 38710 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001496249401359368, - "loss": 3.2623, - "step": 38715 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014955376164379818, - "loss": 3.1085, - "step": 38720 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001494825944644456, - "loss": 3.2231, - "step": 38725 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001494114386032306, - "loss": 3.2339, - "step": 38730 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014934029406550367, - "loss": 3.1435, - "step": 38735 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001492691608566145, - "loss": 3.2924, - "step": 38740 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014919803898191198, - "loss": 3.3259, - "step": 38745 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014912692844674393, - "loss": 3.2163, - "step": 38750 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001490558292564578, - "loss": 3.1284, - "step": 38755 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001489847414163997, - "loss": 3.2856, - "step": 38760 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001489136649319151, - "loss": 3.2836, - "step": 38765 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014884259980834866, - "loss": 3.1405, - "step": 38770 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001487715460510439, - "loss": 3.1253, - "step": 38775 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001487005036653439, - "loss": 3.2167, - "step": 38780 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001486294726565908, - "loss": 3.1719, - "step": 38785 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014855845303012535, - "loss": 3.1742, - "step": 38790 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001484874447912882, - "loss": 3.1847, - "step": 38795 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014841644794541872, - "loss": 3.2533, - "step": 38800 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014834546249785542, - "loss": 3.1358, - "step": 38805 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014827448845393634, - "loss": 3.2148, - "step": 38810 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014820352581899787, - "loss": 3.1691, - "step": 38815 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014813257459837649, - "loss": 3.1114, - "step": 38820 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014806163479740714, - "loss": 3.1567, - "step": 38825 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014799070642142412, - "loss": 3.189, - "step": 38830 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014791978947576115, - "loss": 3.1744, - "step": 38835 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014784888396575038, - "loss": 3.2016, - "step": 38840 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014777798989672387, - "loss": 3.1499, - "step": 38845 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014770710727401241, - "loss": 3.2737, - "step": 38850 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014763623610294588, - "loss": 3.2374, - "step": 38855 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014756537638885376, - "loss": 3.2429, - "step": 38860 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014749452813706405, - "loss": 3.3057, - "step": 38865 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014742369135290409, - "loss": 3.2106, - "step": 38870 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014735286604170074, - "loss": 3.1396, - "step": 38875 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014728205220877953, - "loss": 3.3025, - "step": 38880 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014721124985946521, - "loss": 3.1966, - "step": 38885 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014714045899908215, - "loss": 3.229, - "step": 38890 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001470696796329529, - "loss": 3.2342, - "step": 38895 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001469989117664001, - "loss": 3.2281, - "step": 38900 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014692815540474506, - "loss": 3.1531, - "step": 38905 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014685741055330809, - "loss": 3.2777, - "step": 38910 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014678667721740925, - "loss": 3.0968, - "step": 38915 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014671595540236687, - "loss": 3.2377, - "step": 38920 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014664524511349918, - "loss": 3.2756, - "step": 38925 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014657454635612314, - "loss": 3.2936, - "step": 38930 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014650385913555475, - "loss": 3.1701, - "step": 38935 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014643318345710976, - "loss": 3.2626, - "step": 38940 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014636251932610226, - "loss": 3.1047, - "step": 38945 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001462918667478458, - "loss": 3.2449, - "step": 38950 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014622122572765335, - "loss": 3.1803, - "step": 38955 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014615059627083658, - "loss": 3.199, - "step": 38960 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014607997838270652, - "loss": 3.2114, - "step": 38965 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014600937206857328, - "loss": 3.1264, - "step": 38970 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014593877733374595, - "loss": 3.1626, - "step": 38975 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001458681941835331, - "loss": 3.1494, - "step": 38980 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014579762262324216, - "loss": 3.1752, - "step": 38985 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014572706265817976, - "loss": 3.2887, - "step": 38990 - }, - { - "epoch": 0.67, - "learning_rate": 0.00014565651429365153, - "loss": 3.1689, - "step": 38995 - }, - { - "epoch": 0.67, - "learning_rate": 0.0001455859775349624, - "loss": 3.1576, - "step": 39000 - }, - { - "epoch": 0.67, - "eval_loss": 3.232856512069702, - "eval_runtime": 149.8711, - "eval_samples_per_second": 12.284, - "eval_steps_per_second": 0.774, - "step": 39000 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014551545238741643, - "loss": 3.1477, - "step": 39005 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014544493885631677, - "loss": 3.1183, - "step": 39010 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001453744369469656, - "loss": 3.2491, - "step": 39015 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014530394666466436, - "loss": 3.2484, - "step": 39020 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014523346801471347, - "loss": 3.1706, - "step": 39025 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014516300100241256, - "loss": 3.2258, - "step": 39030 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014509254563306053, - "loss": 3.212, - "step": 39035 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014502210191195522, - "loss": 3.1339, - "step": 39040 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014495166984439356, - "loss": 3.0903, - "step": 39045 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014488124943567174, - "loss": 3.2413, - "step": 39050 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014481084069108488, - "loss": 3.0555, - "step": 39055 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014474044361592755, - "loss": 3.1508, - "step": 39060 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001446700582154932, - "loss": 3.2852, - "step": 39065 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001445996844950744, - "loss": 3.1775, - "step": 39070 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001445293224599629, - "loss": 3.1098, - "step": 39075 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014445897211544948, - "loss": 3.2777, - "step": 39080 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014438863346682437, - "loss": 3.2472, - "step": 39085 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014431830651937658, - "loss": 3.1457, - "step": 39090 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001442479912783941, - "loss": 3.1604, - "step": 39095 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014417768774916456, - "loss": 3.2105, - "step": 39100 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014410739593697432, - "loss": 3.2452, - "step": 39105 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014403711584710886, - "loss": 3.1147, - "step": 39110 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014396684748485308, - "loss": 3.138, - "step": 39115 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014389659085549075, - "loss": 3.1713, - "step": 39120 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014382634596430476, - "loss": 3.2346, - "step": 39125 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014375611281657716, - "loss": 3.1103, - "step": 39130 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014368589141758903, - "loss": 3.2913, - "step": 39135 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014361568177262088, - "loss": 3.0587, - "step": 39140 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014354548388695201, - "loss": 3.183, - "step": 39145 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014347529776586094, - "loss": 3.2254, - "step": 39150 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001434051234146253, - "loss": 3.2179, - "step": 39155 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014333496083852174, - "loss": 3.1301, - "step": 39160 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014326481004282632, - "loss": 3.0906, - "step": 39165 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014319467103281406, - "loss": 3.0157, - "step": 39170 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001431245438137587, - "loss": 3.2439, - "step": 39175 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014305442839093375, - "loss": 3.1518, - "step": 39180 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014298432476961148, - "loss": 3.1854, - "step": 39185 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014291423295506318, - "loss": 3.2038, - "step": 39190 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014284415295255973, - "loss": 3.226, - "step": 39195 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014277408476737034, - "loss": 3.2791, - "step": 39200 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014270402840476412, - "loss": 3.3441, - "step": 39205 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014263398387000885, - "loss": 3.131, - "step": 39210 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014256395116837137, - "loss": 3.2233, - "step": 39215 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014249393030511814, - "loss": 3.1594, - "step": 39220 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014242392128551405, - "loss": 3.2299, - "step": 39225 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001423539241148234, - "loss": 3.2021, - "step": 39230 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014228393879830984, - "loss": 3.2461, - "step": 39235 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014221396534123582, - "loss": 3.1472, - "step": 39240 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014214400374886293, - "loss": 3.1902, - "step": 39245 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014207405402645197, - "loss": 3.2461, - "step": 39250 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001420041161792627, - "loss": 3.2336, - "step": 39255 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014193419021255428, - "loss": 3.2692, - "step": 39260 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014186427613158468, - "loss": 3.1938, - "step": 39265 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014179437394161107, - "loss": 3.1897, - "step": 39270 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014172448364788972, - "loss": 3.2023, - "step": 39275 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014165460525567602, - "loss": 3.1319, - "step": 39280 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014158473877022454, - "loss": 3.3315, - "step": 39285 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014151488419678887, - "loss": 3.2045, - "step": 39290 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014144504154062166, - "loss": 3.298, - "step": 39295 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014137521080697478, - "loss": 3.2035, - "step": 39300 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001413053920010991, - "loss": 3.2147, - "step": 39305 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014123558512824452, - "loss": 3.2121, - "step": 39310 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001411657901936604, - "loss": 3.1644, - "step": 39315 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014109600720259484, - "loss": 3.2276, - "step": 39320 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014102623616029516, - "loss": 3.2369, - "step": 39325 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014095647707200782, - "loss": 3.0479, - "step": 39330 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001408867299429782, - "loss": 3.174, - "step": 39335 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014081699477845112, - "loss": 3.248, - "step": 39340 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014074727158367028, - "loss": 3.1176, - "step": 39345 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014067756036387842, - "loss": 3.218, - "step": 39350 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014060786112431755, - "loss": 3.2828, - "step": 39355 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001405381738702285, - "loss": 3.1822, - "step": 39360 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001404684986068517, - "loss": 3.2422, - "step": 39365 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014039883533942615, - "loss": 3.2066, - "step": 39370 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001403291840731903, - "loss": 3.2333, - "step": 39375 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001402595448133815, - "loss": 3.3032, - "step": 39380 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014018991756523628, - "loss": 3.2043, - "step": 39385 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014012030233399013, - "loss": 3.1163, - "step": 39390 - }, - { - "epoch": 0.68, - "learning_rate": 0.00014005069912487793, - "loss": 3.1967, - "step": 39395 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001399811079431335, - "loss": 3.3533, - "step": 39400 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013991152879398963, - "loss": 3.1829, - "step": 39405 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013984196168267837, - "loss": 3.245, - "step": 39410 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001397724066144307, - "loss": 3.1686, - "step": 39415 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013970286359447697, - "loss": 3.2045, - "step": 39420 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013963333262804643, - "loss": 3.3767, - "step": 39425 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001395638137203674, - "loss": 3.2496, - "step": 39430 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013949430687666732, - "loss": 3.2372, - "step": 39435 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001394248121021727, - "loss": 3.1356, - "step": 39440 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013935532940210937, - "loss": 3.2368, - "step": 39445 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013928585878170212, - "loss": 3.2078, - "step": 39450 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001392164002461744, - "loss": 3.2284, - "step": 39455 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013914695380074947, - "loss": 3.312, - "step": 39460 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001390775194506493, - "loss": 3.0963, - "step": 39465 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013900809720109478, - "loss": 3.1184, - "step": 39470 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013893868705730657, - "loss": 3.0958, - "step": 39475 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001388692890245034, - "loss": 3.2181, - "step": 39480 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013879990310790405, - "loss": 3.2215, - "step": 39485 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013873052931272583, - "loss": 3.2281, - "step": 39490 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001386611676441852, - "loss": 3.2299, - "step": 39495 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013859181810749817, - "loss": 3.2065, - "step": 39500 - }, - { - "epoch": 0.68, - "eval_loss": 3.226215362548828, - "eval_runtime": 149.8725, - "eval_samples_per_second": 12.284, - "eval_steps_per_second": 0.774, - "step": 39500 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013852248070787897, - "loss": 3.1109, - "step": 39505 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013845315545054179, - "loss": 3.1988, - "step": 39510 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013838384234069937, - "loss": 3.2595, - "step": 39515 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013831454138356366, - "loss": 3.1322, - "step": 39520 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013824525258434602, - "loss": 3.2045, - "step": 39525 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001381759759482563, - "loss": 3.2601, - "step": 39530 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013810671148050373, - "loss": 3.249, - "step": 39535 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013803745918629688, - "loss": 3.3018, - "step": 39540 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013796821907084307, - "loss": 3.1712, - "step": 39545 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013789899113934878, - "loss": 3.1496, - "step": 39550 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013782977539701958, - "loss": 3.1739, - "step": 39555 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013776057184906008, - "loss": 3.3045, - "step": 39560 - }, - { - "epoch": 0.68, - "learning_rate": 0.0001376913805006742, - "loss": 3.2423, - "step": 39565 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013762220135706468, - "loss": 3.1416, - "step": 39570 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013755303442343337, - "loss": 3.2441, - "step": 39575 - }, - { - "epoch": 0.68, - "learning_rate": 0.00013748387970498156, - "loss": 3.2493, - "step": 39580 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001374147372069089, - "loss": 3.224, - "step": 39585 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013734560693441484, - "loss": 3.28, - "step": 39590 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013727648889269754, - "loss": 3.2218, - "step": 39595 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013720738308695428, - "loss": 3.1533, - "step": 39600 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013713828952238168, - "loss": 3.2559, - "step": 39605 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013706920820417498, - "loss": 3.1483, - "step": 39610 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001370001391375287, - "loss": 3.2323, - "step": 39615 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013693108232763667, - "loss": 3.1266, - "step": 39620 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013686203777969156, - "loss": 3.1826, - "step": 39625 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013679300549888514, - "loss": 3.2438, - "step": 39630 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013672398549040827, - "loss": 3.1957, - "step": 39635 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013665497775945082, - "loss": 3.2472, - "step": 39640 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013658598231120205, - "loss": 3.0956, - "step": 39645 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013651699915084986, - "loss": 3.2534, - "step": 39650 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013644802828358154, - "loss": 3.1855, - "step": 39655 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001363790697145833, - "loss": 3.2826, - "step": 39660 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013631012344904038, - "loss": 3.176, - "step": 39665 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013624118949213737, - "loss": 3.1286, - "step": 39670 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013617226784905765, - "loss": 3.2014, - "step": 39675 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001361033585249838, - "loss": 3.1897, - "step": 39680 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013603446152509743, - "loss": 3.3175, - "step": 39685 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013596557685457923, - "loss": 3.1729, - "step": 39690 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001358967045186089, - "loss": 3.216, - "step": 39695 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013582784452236546, - "loss": 3.1958, - "step": 39700 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013575899687102674, - "loss": 3.2406, - "step": 39705 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013569016156976977, - "loss": 3.1854, - "step": 39710 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013562133862377054, - "loss": 3.2161, - "step": 39715 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001355525280382041, - "loss": 3.0194, - "step": 39720 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013548372981824492, - "loss": 3.0951, - "step": 39725 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001354149439690662, - "loss": 3.1332, - "step": 39730 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013534617049584002, - "loss": 3.2012, - "step": 39735 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013527740940373807, - "loss": 3.2941, - "step": 39740 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013520866069793077, - "loss": 3.2922, - "step": 39745 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001351399243835875, - "loss": 3.2285, - "step": 39750 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013507120046587728, - "loss": 3.1846, - "step": 39755 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013500248894996727, - "loss": 3.2841, - "step": 39760 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013493378984102462, - "loss": 3.2459, - "step": 39765 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013486510314421503, - "loss": 3.2431, - "step": 39770 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013479642886470326, - "loss": 3.1897, - "step": 39775 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001347277670076536, - "loss": 3.1831, - "step": 39780 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013465911757822865, - "loss": 3.2116, - "step": 39785 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013459048058159085, - "loss": 3.0972, - "step": 39790 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013452185602290113, - "loss": 3.2108, - "step": 39795 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001344532439073197, - "loss": 3.2728, - "step": 39800 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013438464424000602, - "loss": 3.188, - "step": 39805 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001343160570261184, - "loss": 3.1234, - "step": 39810 - }, - { - "epoch": 0.69, - "learning_rate": 0.000134247482270814, - "loss": 3.1929, - "step": 39815 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013417891997924956, - "loss": 3.1941, - "step": 39820 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013411037015658052, - "loss": 3.2134, - "step": 39825 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013404183280796137, - "loss": 3.1629, - "step": 39830 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001339733079385461, - "loss": 3.315, - "step": 39835 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013390479555348698, - "loss": 3.1403, - "step": 39840 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013383629565793612, - "loss": 3.1475, - "step": 39845 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013376780825704422, - "loss": 3.1861, - "step": 39850 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013369933335596112, - "loss": 3.2026, - "step": 39855 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013363087095983608, - "loss": 3.2458, - "step": 39860 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013356242107381673, - "loss": 3.0653, - "step": 39865 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001334939837030504, - "loss": 3.1658, - "step": 39870 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001334255588526832, - "loss": 3.2229, - "step": 39875 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013335714652786018, - "loss": 3.3034, - "step": 39880 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001332887467337259, - "loss": 3.1056, - "step": 39885 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013322035947542344, - "loss": 3.2603, - "step": 39890 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013315198475809505, - "loss": 3.1717, - "step": 39895 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013308362258688245, - "loss": 3.1708, - "step": 39900 - }, - { - "epoch": 0.69, - "learning_rate": 0.000133015272966926, - "loss": 3.0321, - "step": 39905 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013294693590336522, - "loss": 3.1176, - "step": 39910 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013287861140133875, - "loss": 3.1338, - "step": 39915 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001328102994659841, - "loss": 3.1293, - "step": 39920 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001327420001024382, - "loss": 3.1591, - "step": 39925 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013267371331583672, - "loss": 3.2051, - "step": 39930 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013260543911131447, - "loss": 3.2059, - "step": 39935 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013253717749400531, - "loss": 3.1315, - "step": 39940 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013246892846904205, - "loss": 3.2699, - "step": 39945 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001324006920415569, - "loss": 3.2865, - "step": 39950 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013233246821668077, - "loss": 3.1292, - "step": 39955 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001322642569995438, - "loss": 3.237, - "step": 39960 - }, - { - "epoch": 0.69, - "learning_rate": 0.000132196058395275, - "loss": 3.1434, - "step": 39965 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013212787240900265, - "loss": 3.2707, - "step": 39970 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013205969904585385, - "loss": 3.2334, - "step": 39975 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013199153831095511, - "loss": 3.1555, - "step": 39980 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001319233902094317, - "loss": 3.1761, - "step": 39985 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013185525474640792, - "loss": 3.3449, - "step": 39990 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013178713192700726, - "loss": 3.1922, - "step": 39995 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013171902175635212, - "loss": 3.2252, - "step": 40000 - }, - { - "epoch": 0.69, - "eval_loss": 3.221252679824829, - "eval_runtime": 149.7766, - "eval_samples_per_second": 12.292, - "eval_steps_per_second": 0.774, - "step": 40000 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001316509242395642, - "loss": 3.2361, - "step": 40005 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013158283938176403, - "loss": 3.1515, - "step": 40010 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013151476718807118, - "loss": 3.2082, - "step": 40015 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013144670766360438, - "loss": 3.1433, - "step": 40020 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013137866081348122, - "loss": 3.0056, - "step": 40025 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013131062664281876, - "loss": 3.1968, - "step": 40030 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001312426051567326, - "loss": 3.1794, - "step": 40035 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013117459636033767, - "loss": 3.086, - "step": 40040 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013110660025874787, - "loss": 3.2869, - "step": 40045 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013103861685707623, - "loss": 3.2714, - "step": 40050 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001309706461604345, - "loss": 3.3275, - "step": 40055 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013090268817393409, - "loss": 3.1481, - "step": 40060 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013083474290268487, - "loss": 3.1364, - "step": 40065 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013076681035179604, - "loss": 3.2324, - "step": 40070 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001306988905263758, - "loss": 3.2283, - "step": 40075 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013063098343153116, - "loss": 3.2573, - "step": 40080 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001305630890723687, - "loss": 3.1392, - "step": 40085 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013049520745399363, - "loss": 3.3101, - "step": 40090 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013042733858151023, - "loss": 3.1184, - "step": 40095 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013035948246002191, - "loss": 3.2271, - "step": 40100 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013029163909463102, - "loss": 3.2393, - "step": 40105 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013022380849043926, - "loss": 3.2122, - "step": 40110 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013015599065254707, - "loss": 3.2095, - "step": 40115 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013008818558605376, - "loss": 3.2102, - "step": 40120 - }, - { - "epoch": 0.69, - "learning_rate": 0.00013002039329605824, - "loss": 3.0793, - "step": 40125 - }, - { - "epoch": 0.69, - "learning_rate": 0.00012995261378765798, - "loss": 3.1341, - "step": 40130 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001298848470659496, - "loss": 3.2486, - "step": 40135 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001298170931360291, - "loss": 3.1172, - "step": 40140 - }, - { - "epoch": 0.69, - "learning_rate": 0.00012974935200299077, - "loss": 3.0749, - "step": 40145 - }, - { - "epoch": 0.69, - "learning_rate": 0.0001296816236719288, - "loss": 3.1997, - "step": 40150 - }, - { - "epoch": 0.69, - "learning_rate": 0.00012961390814793583, - "loss": 3.1917, - "step": 40155 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012954620543610367, - "loss": 3.2264, - "step": 40160 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012947851554152354, - "loss": 3.2491, - "step": 40165 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012941083846928488, - "loss": 3.1931, - "step": 40170 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012934317422447702, - "loss": 3.218, - "step": 40175 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001292755228121879, - "loss": 3.2307, - "step": 40180 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012920788423750434, - "loss": 3.2845, - "step": 40185 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012914025850551284, - "loss": 3.1866, - "step": 40190 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012907264562129814, - "loss": 3.1706, - "step": 40195 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012900504558994438, - "loss": 3.2382, - "step": 40200 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012893745841653499, - "loss": 3.1685, - "step": 40205 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012886988410615207, - "loss": 3.1907, - "step": 40210 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012880232266387678, - "loss": 3.2388, - "step": 40215 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001287347740947895, - "loss": 3.2157, - "step": 40220 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012866723840396934, - "loss": 3.1247, - "step": 40225 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001285997155964949, - "loss": 3.2234, - "step": 40230 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012853220567744344, - "loss": 3.2356, - "step": 40235 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012846470865189138, - "loss": 3.2794, - "step": 40240 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001283972245249141, - "loss": 3.1414, - "step": 40245 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001283297533015861, - "loss": 3.1587, - "step": 40250 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012826229498698075, - "loss": 3.2113, - "step": 40255 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012819484958617077, - "loss": 3.2561, - "step": 40260 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001281274171042276, - "loss": 3.1379, - "step": 40265 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012805999754622182, - "loss": 3.1382, - "step": 40270 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001279925909172231, - "loss": 3.2634, - "step": 40275 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012792519722229989, - "loss": 3.2751, - "step": 40280 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012785781646652006, - "loss": 3.0939, - "step": 40285 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012779044865495024, - "loss": 3.2182, - "step": 40290 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001277230937926561, - "loss": 3.0571, - "step": 40295 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012765575188470246, - "loss": 3.1138, - "step": 40300 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012758842293615285, - "loss": 3.1653, - "step": 40305 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012752110695207037, - "loss": 3.2168, - "step": 40310 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001274538039375167, - "loss": 3.1544, - "step": 40315 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012738651389755274, - "loss": 3.2413, - "step": 40320 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012731923683723828, - "loss": 3.2262, - "step": 40325 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012725197276163222, - "loss": 3.1795, - "step": 40330 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012718472167579236, - "loss": 3.1318, - "step": 40335 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012711748358477592, - "loss": 3.2327, - "step": 40340 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012705025849363868, - "loss": 3.3024, - "step": 40345 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012698304640743565, - "loss": 3.156, - "step": 40350 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012691584733122083, - "loss": 3.0815, - "step": 40355 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012684866127004715, - "loss": 3.1712, - "step": 40360 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012678148822896685, - "loss": 3.1828, - "step": 40365 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001267143282130309, - "loss": 3.2036, - "step": 40370 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001266471812272894, - "loss": 3.1898, - "step": 40375 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012658004727679145, - "loss": 3.206, - "step": 40380 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001265129263665851, - "loss": 3.2307, - "step": 40385 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012644581850171764, - "loss": 3.1573, - "step": 40390 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012637872368723534, - "loss": 3.1681, - "step": 40395 - }, - { - "epoch": 0.7, - "learning_rate": 0.000126311641928183, - "loss": 3.2237, - "step": 40400 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001262445732296051, - "loss": 3.1012, - "step": 40405 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012617751759654488, - "loss": 3.1711, - "step": 40410 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012611047503404435, - "loss": 3.2486, - "step": 40415 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012604344554714518, - "loss": 3.1923, - "step": 40420 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012597642914088716, - "loss": 3.2736, - "step": 40425 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012590942582030992, - "loss": 3.2386, - "step": 40430 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012584243559045168, - "loss": 3.1725, - "step": 40435 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001257754584563496, - "loss": 3.1664, - "step": 40440 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001257084944230404, - "loss": 3.1511, - "step": 40445 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012564154349555893, - "loss": 3.0612, - "step": 40450 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012557460567893999, - "loss": 3.3118, - "step": 40455 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012550768097821673, - "loss": 3.0661, - "step": 40460 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001254407693984215, - "loss": 3.2882, - "step": 40465 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012537387094458603, - "loss": 3.1967, - "step": 40470 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012530698562174044, - "loss": 3.1915, - "step": 40475 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012524011343491412, - "loss": 3.1552, - "step": 40480 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012517325438913572, - "loss": 3.0782, - "step": 40485 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012510640848943265, - "loss": 3.1107, - "step": 40490 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012503957574083133, - "loss": 3.1968, - "step": 40495 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001249727561483573, - "loss": 3.1461, - "step": 40500 - }, - { - "epoch": 0.7, - "eval_loss": 3.2169432640075684, - "eval_runtime": 149.8523, - "eval_samples_per_second": 12.285, - "eval_steps_per_second": 0.774, - "step": 40500 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012490594971703483, - "loss": 2.9939, - "step": 40505 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012483915645188776, - "loss": 3.0932, - "step": 40510 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012477237635793843, - "loss": 3.1378, - "step": 40515 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012470560944020825, - "loss": 3.1682, - "step": 40520 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012463885570371808, - "loss": 3.0671, - "step": 40525 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012457211515348705, - "loss": 3.0951, - "step": 40530 - }, - { - "epoch": 0.7, - "learning_rate": 0.000124505387794534, - "loss": 3.2597, - "step": 40535 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001244386736318764, - "loss": 3.1131, - "step": 40540 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001243719726705307, - "loss": 3.0366, - "step": 40545 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012430528491551277, - "loss": 3.148, - "step": 40550 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012423861037183693, - "loss": 3.1749, - "step": 40555 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012417194904451668, - "loss": 3.1621, - "step": 40560 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012410530093856487, - "loss": 3.1866, - "step": 40565 - }, - { - "epoch": 0.7, - "learning_rate": 0.000124038666058993, - "loss": 3.2424, - "step": 40570 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012397204441081164, - "loss": 3.3355, - "step": 40575 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012390543599903042, - "loss": 3.1592, - "step": 40580 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012383884082865781, - "loss": 3.105, - "step": 40585 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001237722589047017, - "loss": 3.2395, - "step": 40590 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012370569023216856, - "loss": 3.1991, - "step": 40595 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012363913481606403, - "loss": 3.1987, - "step": 40600 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012357259266139272, - "loss": 3.0725, - "step": 40605 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012350606377315815, - "loss": 3.2065, - "step": 40610 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012343954815636314, - "loss": 3.2754, - "step": 40615 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001233730458160093, - "loss": 3.1027, - "step": 40620 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001233065567570972, - "loss": 3.244, - "step": 40625 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012324008098462652, - "loss": 3.1625, - "step": 40630 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012317361850359583, - "loss": 3.1442, - "step": 40635 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001231071693190027, - "loss": 3.2685, - "step": 40640 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012304073343584403, - "loss": 3.1912, - "step": 40645 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012297431085911526, - "loss": 3.1647, - "step": 40650 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012290790159381106, - "loss": 3.1712, - "step": 40655 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012284150564492513, - "loss": 3.3168, - "step": 40660 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001227751230174499, - "loss": 3.2013, - "step": 40665 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001227087537163773, - "loss": 3.2584, - "step": 40670 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012264239774669776, - "loss": 3.2529, - "step": 40675 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012257605511340104, - "loss": 3.1225, - "step": 40680 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012250972582147562, - "loss": 3.0349, - "step": 40685 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012244340987590914, - "loss": 3.1949, - "step": 40690 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012237710728168835, - "loss": 3.2146, - "step": 40695 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012231081804379892, - "loss": 3.2274, - "step": 40700 - }, - { - "epoch": 0.7, - "learning_rate": 0.0001222445421672251, - "loss": 3.1047, - "step": 40705 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012217827965695083, - "loss": 3.276, - "step": 40710 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012211203051795864, - "loss": 3.1447, - "step": 40715 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012204579475522996, - "loss": 3.1452, - "step": 40720 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012197957237374574, - "loss": 3.187, - "step": 40725 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012191336337848511, - "loss": 3.1845, - "step": 40730 - }, - { - "epoch": 0.7, - "learning_rate": 0.00012184716777442699, - "loss": 3.1049, - "step": 40735 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012178098556654884, - "loss": 3.1475, - "step": 40740 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012171481675982707, - "loss": 3.0952, - "step": 40745 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012164866135923754, - "loss": 3.204, - "step": 40750 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001215825193697546, - "loss": 3.3169, - "step": 40755 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012151639079635188, - "loss": 3.324, - "step": 40760 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012145027564400183, - "loss": 3.1329, - "step": 40765 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012138417391767586, - "loss": 3.2409, - "step": 40770 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012131808562234478, - "loss": 3.1679, - "step": 40775 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012125201076297802, - "loss": 3.2133, - "step": 40780 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012118594934454375, - "loss": 3.2173, - "step": 40785 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001211199013720098, - "loss": 2.9781, - "step": 40790 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001210538668503425, - "loss": 3.2979, - "step": 40795 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012098784578450726, - "loss": 3.1413, - "step": 40800 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001209218381794688, - "loss": 3.1493, - "step": 40805 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012085584404019011, - "loss": 3.1366, - "step": 40810 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012078986337163399, - "loss": 3.1048, - "step": 40815 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012072389617876167, - "loss": 3.1932, - "step": 40820 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012065794246653348, - "loss": 3.1397, - "step": 40825 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012059200223990913, - "loss": 3.1325, - "step": 40830 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012052607550384668, - "loss": 3.1377, - "step": 40835 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012046016226330344, - "loss": 3.1563, - "step": 40840 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012039426252323596, - "loss": 3.246, - "step": 40845 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012032837628859946, - "loss": 3.0618, - "step": 40850 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001202625035643483, - "loss": 3.1616, - "step": 40855 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012019664435543569, - "loss": 3.1188, - "step": 40860 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012013079866681387, - "loss": 3.2018, - "step": 40865 - }, - { - "epoch": 0.71, - "learning_rate": 0.00012006496650343427, - "loss": 3.1535, - "step": 40870 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011999914787024704, - "loss": 3.1538, - "step": 40875 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011993334277220142, - "loss": 3.2631, - "step": 40880 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011986755121424558, - "loss": 3.1214, - "step": 40885 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011980177320132663, - "loss": 3.1057, - "step": 40890 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001197360087383909, - "loss": 3.1804, - "step": 40895 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001196702578303835, - "loss": 3.0932, - "step": 40900 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011960452048224853, - "loss": 3.1851, - "step": 40905 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011953879669892913, - "loss": 3.1617, - "step": 40910 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011947308648536734, - "loss": 3.1573, - "step": 40915 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011940738984650419, - "loss": 3.1449, - "step": 40920 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011934170678727987, - "loss": 3.1135, - "step": 40925 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011927603731263337, - "loss": 3.1522, - "step": 40930 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011921038142750264, - "loss": 3.1436, - "step": 40935 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001191447391368247, - "loss": 3.1961, - "step": 40940 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011907911044553541, - "loss": 3.2341, - "step": 40945 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011901349535856993, - "loss": 3.3183, - "step": 40950 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011894789388086205, - "loss": 3.1467, - "step": 40955 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011888230601734466, - "loss": 3.099, - "step": 40960 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011881673177294967, - "loss": 3.1508, - "step": 40965 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011875117115260775, - "loss": 3.2019, - "step": 40970 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011868562416124903, - "loss": 3.2991, - "step": 40975 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011862009080380213, - "loss": 3.0779, - "step": 40980 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011855457108519482, - "loss": 3.1401, - "step": 40985 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001184890650103539, - "loss": 3.1752, - "step": 40990 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011842357258420507, - "loss": 3.241, - "step": 40995 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011835809381167289, - "loss": 3.2169, - "step": 41000 - }, - { - "epoch": 0.71, - "eval_loss": 3.2123522758483887, - "eval_runtime": 149.7773, - "eval_samples_per_second": 12.292, - "eval_steps_per_second": 0.774, - "step": 41000 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011829262869768125, - "loss": 3.1618, - "step": 41005 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011822717724715272, - "loss": 3.1831, - "step": 41010 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001181617394650089, - "loss": 3.1159, - "step": 41015 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011809631535617035, - "loss": 3.2193, - "step": 41020 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011803090492555653, - "loss": 3.2243, - "step": 41025 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001179655081780862, - "loss": 3.1949, - "step": 41030 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011790012511867675, - "loss": 3.2647, - "step": 41035 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011783475575224461, - "loss": 3.1304, - "step": 41040 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001177694000837053, - "loss": 3.045, - "step": 41045 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011770405811797305, - "loss": 3.1695, - "step": 41050 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011763872985996152, - "loss": 3.1831, - "step": 41055 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011757341531458297, - "loss": 3.0754, - "step": 41060 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001175081144867485, - "loss": 3.0875, - "step": 41065 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011744282738136864, - "loss": 3.1625, - "step": 41070 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011737755400335259, - "loss": 3.2232, - "step": 41075 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011731229435760843, - "loss": 3.2507, - "step": 41080 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011724704844904368, - "loss": 3.1869, - "step": 41085 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011718181628256413, - "loss": 3.2847, - "step": 41090 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011711659786307514, - "loss": 3.199, - "step": 41095 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011705139319548073, - "loss": 3.1731, - "step": 41100 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011698620228468384, - "loss": 3.0682, - "step": 41105 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011692102513558682, - "loss": 3.1529, - "step": 41110 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011685586175309026, - "loss": 3.31, - "step": 41115 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011679071214209438, - "loss": 3.1318, - "step": 41120 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011672557630749801, - "loss": 3.2206, - "step": 41125 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011666045425419895, - "loss": 3.1701, - "step": 41130 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011659534598709431, - "loss": 3.1972, - "step": 41135 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011653025151107965, - "loss": 3.1705, - "step": 41140 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001164651708310497, - "loss": 3.1075, - "step": 41145 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011640010395189838, - "loss": 3.2107, - "step": 41150 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001163350508785183, - "loss": 3.1441, - "step": 41155 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011627001161580116, - "loss": 3.2071, - "step": 41160 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011620498616863753, - "loss": 3.0613, - "step": 41165 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011613997454191691, - "loss": 3.2416, - "step": 41170 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011607497674052802, - "loss": 3.1232, - "step": 41175 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011600999276935832, - "loss": 3.1565, - "step": 41180 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011594502263329418, - "loss": 3.1478, - "step": 41185 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001158800663372211, - "loss": 3.0881, - "step": 41190 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011581512388602334, - "loss": 3.174, - "step": 41195 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001157501952845844, - "loss": 3.0763, - "step": 41200 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011568528053778651, - "loss": 3.153, - "step": 41205 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011562037965051084, - "loss": 3.2635, - "step": 41210 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001155554926276379, - "loss": 3.2358, - "step": 41215 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011549061947404656, - "loss": 3.167, - "step": 41220 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011542576019461489, - "loss": 3.144, - "step": 41225 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011536091479422028, - "loss": 3.1572, - "step": 41230 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011529608327773859, - "loss": 3.128, - "step": 41235 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011523126565004486, - "loss": 3.2118, - "step": 41240 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011516646191601301, - "loss": 3.273, - "step": 41245 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011510167208051588, - "loss": 3.1155, - "step": 41250 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011503689614842551, - "loss": 3.2507, - "step": 41255 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011497213412461262, - "loss": 3.231, - "step": 41260 - }, - { - "epoch": 0.71, - "learning_rate": 0.000114907386013947, - "loss": 3.1433, - "step": 41265 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011484265182129739, - "loss": 3.1993, - "step": 41270 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011477793155153134, - "loss": 3.3162, - "step": 41275 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011471322520951567, - "loss": 3.2125, - "step": 41280 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001146485328001159, - "loss": 3.1264, - "step": 41285 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011458385432819654, - "loss": 3.2733, - "step": 41290 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011451918979862108, - "loss": 3.2649, - "step": 41295 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011445453921625198, - "loss": 3.2322, - "step": 41300 - }, - { - "epoch": 0.71, - "learning_rate": 0.00011438990258595055, - "loss": 3.1406, - "step": 41305 - }, - { - "epoch": 0.71, - "learning_rate": 0.0001143252799125773, - "loss": 3.1362, - "step": 41310 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011426067120099141, - "loss": 3.1726, - "step": 41315 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011419607645605117, - "loss": 3.1266, - "step": 41320 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011413149568261374, - "loss": 3.1741, - "step": 41325 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011406692888553518, - "loss": 3.1985, - "step": 41330 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011400237606967078, - "loss": 3.0828, - "step": 41335 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011393783723987457, - "loss": 3.2476, - "step": 41340 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011387331240099926, - "loss": 3.2025, - "step": 41345 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011380880155789706, - "loss": 3.3122, - "step": 41350 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011374430471541874, - "loss": 3.1677, - "step": 41355 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011367982187841411, - "loss": 2.9866, - "step": 41360 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011361535305173218, - "loss": 3.1992, - "step": 41365 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011355089824022028, - "loss": 3.1555, - "step": 41370 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011348645744872542, - "loss": 3.2704, - "step": 41375 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011342203068209307, - "loss": 3.1413, - "step": 41380 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011335761794516771, - "loss": 3.1366, - "step": 41385 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011329321924279318, - "loss": 3.1669, - "step": 41390 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011322883457981149, - "loss": 3.1667, - "step": 41395 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011316446396106438, - "loss": 3.1504, - "step": 41400 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011310010739139204, - "loss": 3.2718, - "step": 41405 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011303576487563366, - "loss": 3.1071, - "step": 41410 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011297143641862781, - "loss": 3.2227, - "step": 41415 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011290712202521133, - "loss": 3.2243, - "step": 41420 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011284282170022033, - "loss": 3.185, - "step": 41425 - }, - { - "epoch": 0.72, - "learning_rate": 0.0001127785354484901, - "loss": 3.1816, - "step": 41430 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011271426327485448, - "loss": 3.204, - "step": 41435 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011265000518414633, - "loss": 3.1829, - "step": 41440 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011258576118119786, - "loss": 3.0784, - "step": 41445 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011252153127083944, - "loss": 3.0765, - "step": 41450 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011245731545790121, - "loss": 3.0026, - "step": 41455 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011239311374721167, - "loss": 3.1984, - "step": 41460 - }, - { - "epoch": 0.72, - "learning_rate": 0.0001123289261435984, - "loss": 3.1799, - "step": 41465 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011226475265188832, - "loss": 3.2619, - "step": 41470 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011220059327690649, - "loss": 3.1734, - "step": 41475 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011213644802347769, - "loss": 3.1921, - "step": 41480 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011207231689642522, - "loss": 3.1058, - "step": 41485 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011200819990057129, - "loss": 3.0331, - "step": 41490 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011194409704073752, - "loss": 3.0883, - "step": 41495 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011188000832174376, - "loss": 3.1265, - "step": 41500 - }, - { - "epoch": 0.72, - "eval_loss": 3.207731008529663, - "eval_runtime": 149.6797, - "eval_samples_per_second": 12.3, - "eval_steps_per_second": 0.775, - "step": 41500 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011181593374840916, - "loss": 3.0709, - "step": 41505 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011175187332555206, - "loss": 3.2107, - "step": 41510 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011168782705798929, - "loss": 3.1451, - "step": 41515 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011162379495053683, - "loss": 3.118, - "step": 41520 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011155977700800959, - "loss": 3.0218, - "step": 41525 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011149577323522128, - "loss": 3.0796, - "step": 41530 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011143178363698489, - "loss": 3.2099, - "step": 41535 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011136780821811194, - "loss": 3.0831, - "step": 41540 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011130384698341308, - "loss": 3.2227, - "step": 41545 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011123989993769789, - "loss": 3.151, - "step": 41550 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011117596708577473, - "loss": 3.1856, - "step": 41555 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011111204843245124, - "loss": 3.086, - "step": 41560 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011104814398253367, - "loss": 3.2135, - "step": 41565 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011098425374082733, - "loss": 3.1735, - "step": 41570 - }, - { - "epoch": 0.72, - "learning_rate": 0.0001109203777121364, - "loss": 3.2179, - "step": 41575 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011085651590126405, - "loss": 3.2251, - "step": 41580 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011079266831301225, - "loss": 3.1628, - "step": 41585 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011072883495218224, - "loss": 3.2514, - "step": 41590 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011066501582357384, - "loss": 3.2295, - "step": 41595 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011060121093198591, - "loss": 3.2016, - "step": 41600 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011053742028221629, - "loss": 3.1608, - "step": 41605 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011047364387906157, - "loss": 3.1431, - "step": 41610 - }, - { - "epoch": 0.72, - "learning_rate": 0.0001104098817273176, - "loss": 3.1741, - "step": 41615 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011034613383177893, - "loss": 3.2561, - "step": 41620 - }, - { - "epoch": 0.72, - "learning_rate": 0.000110282400197239, - "loss": 3.1301, - "step": 41625 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011021868082849031, - "loss": 3.1035, - "step": 41630 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011015497573032411, - "loss": 3.2929, - "step": 41635 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011009128490753086, - "loss": 3.233, - "step": 41640 - }, - { - "epoch": 0.72, - "learning_rate": 0.00011002760836489986, - "loss": 3.0038, - "step": 41645 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010996394610721889, - "loss": 3.2885, - "step": 41650 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010990029813927538, - "loss": 3.1889, - "step": 41655 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010983666446585519, - "loss": 3.2143, - "step": 41660 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010977304509174315, - "loss": 3.1514, - "step": 41665 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010970944002172335, - "loss": 3.1891, - "step": 41670 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010964584926057843, - "loss": 3.1459, - "step": 41675 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010958227281309008, - "loss": 3.069, - "step": 41680 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010951871068403893, - "loss": 3.3585, - "step": 41685 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010945516287820446, - "loss": 3.2503, - "step": 41690 - }, - { - "epoch": 0.72, - "learning_rate": 0.0001093916294003653, - "loss": 3.1437, - "step": 41695 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010932811025529875, - "loss": 3.0947, - "step": 41700 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010926460544778115, - "loss": 3.2289, - "step": 41705 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010920111498258771, - "loss": 3.1003, - "step": 41710 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010913763886449249, - "loss": 3.1294, - "step": 41715 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010907417709826878, - "loss": 3.126, - "step": 41720 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010901072968868858, - "loss": 3.0671, - "step": 41725 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010894729664052247, - "loss": 3.2908, - "step": 41730 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010888387795854067, - "loss": 3.1179, - "step": 41735 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010882047364751177, - "loss": 3.1891, - "step": 41740 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010875708371220335, - "loss": 3.193, - "step": 41745 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010869370815738237, - "loss": 3.2572, - "step": 41750 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010863034698781387, - "loss": 3.1651, - "step": 41755 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010856700020826264, - "loss": 3.1652, - "step": 41760 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010850366782349193, - "loss": 3.1472, - "step": 41765 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010844034983826389, - "loss": 3.2266, - "step": 41770 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010837704625734002, - "loss": 3.2599, - "step": 41775 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010831375708547998, - "loss": 3.2315, - "step": 41780 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010825048232744318, - "loss": 3.2559, - "step": 41785 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010818722198798737, - "loss": 3.1243, - "step": 41790 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010812397607186936, - "loss": 3.1431, - "step": 41795 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010806074458384519, - "loss": 3.2221, - "step": 41800 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010799752752866928, - "loss": 3.2885, - "step": 41805 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010793432491109518, - "loss": 3.2099, - "step": 41810 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010787113673587563, - "loss": 3.2519, - "step": 41815 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010780796300776196, - "loss": 3.1942, - "step": 41820 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010774480373150449, - "loss": 3.0997, - "step": 41825 - }, - { - "epoch": 0.72, - "learning_rate": 0.0001076816589118525, - "loss": 3.1281, - "step": 41830 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010761852855355405, - "loss": 3.1984, - "step": 41835 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010755541266135639, - "loss": 3.1747, - "step": 41840 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010749231124000543, - "loss": 3.116, - "step": 41845 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010742922429424612, - "loss": 3.093, - "step": 41850 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010736615182882218, - "loss": 3.2103, - "step": 41855 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010730309384847643, - "loss": 3.2, - "step": 41860 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010724005035795035, - "loss": 3.0806, - "step": 41865 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010717702136198471, - "loss": 3.2047, - "step": 41870 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010711400686531885, - "loss": 3.1632, - "step": 41875 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010705100687269118, - "loss": 3.0442, - "step": 41880 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010698802138883891, - "loss": 3.2539, - "step": 41885 - }, - { - "epoch": 0.72, - "learning_rate": 0.00010692505041849818, - "loss": 3.1052, - "step": 41890 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010686209396640425, - "loss": 3.2667, - "step": 41895 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010679915203729107, - "loss": 3.2379, - "step": 41900 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010673622463589149, - "loss": 3.2051, - "step": 41905 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010667331176693735, - "loss": 3.1852, - "step": 41910 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010661041343515931, - "loss": 3.1764, - "step": 41915 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010654752964528715, - "loss": 3.191, - "step": 41920 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010648466040204932, - "loss": 3.2177, - "step": 41925 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010642180571017332, - "loss": 3.176, - "step": 41930 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010635896557438544, - "loss": 3.1663, - "step": 41935 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010629613999941095, - "loss": 3.1828, - "step": 41940 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010623332898997389, - "loss": 3.246, - "step": 41945 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010617053255079758, - "loss": 3.1608, - "step": 41950 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010610775068660385, - "loss": 3.1494, - "step": 41955 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010604498340211361, - "loss": 3.173, - "step": 41960 - }, - { - "epoch": 0.73, - "learning_rate": 0.0001059822307020466, - "loss": 3.2291, - "step": 41965 - }, - { - "epoch": 0.73, - "learning_rate": 0.0001059194925911214, - "loss": 3.1392, - "step": 41970 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010585676907405583, - "loss": 3.1625, - "step": 41975 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010579406015556625, - "loss": 3.271, - "step": 41980 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010573136584036807, - "loss": 3.1903, - "step": 41985 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010566868613317558, - "loss": 3.1708, - "step": 41990 - }, - { - "epoch": 0.73, - "learning_rate": 0.0001056060210387018, - "loss": 3.1984, - "step": 41995 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010554337056165913, - "loss": 3.1468, - "step": 42000 - }, - { - "epoch": 0.73, - "eval_loss": 3.204759359359741, - "eval_runtime": 149.9776, - "eval_samples_per_second": 12.275, - "eval_steps_per_second": 0.773, - "step": 42000 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010548073470675853, - "loss": 3.0838, - "step": 42005 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010541811347870956, - "loss": 3.1097, - "step": 42010 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010535550688222135, - "loss": 3.0943, - "step": 42015 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010529291492200146, - "loss": 3.212, - "step": 42020 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010523033760275644, - "loss": 3.1342, - "step": 42025 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010516777492919206, - "loss": 3.1234, - "step": 42030 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010510522690601228, - "loss": 3.09, - "step": 42035 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010504269353792073, - "loss": 3.1094, - "step": 42040 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010498017482961946, - "loss": 3.1887, - "step": 42045 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010491767078580953, - "loss": 3.1215, - "step": 42050 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010485518141119113, - "loss": 3.1981, - "step": 42055 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010479270671046284, - "loss": 3.1392, - "step": 42060 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010473024668832265, - "loss": 3.1256, - "step": 42065 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010466780134946715, - "loss": 3.1203, - "step": 42070 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010460537069859185, - "loss": 3.0632, - "step": 42075 - }, - { - "epoch": 0.73, - "learning_rate": 0.0001045429547403915, - "loss": 3.1372, - "step": 42080 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010448055347955913, - "loss": 3.2098, - "step": 42085 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010441816692078702, - "loss": 3.1605, - "step": 42090 - }, - { - "epoch": 0.73, - "learning_rate": 0.0001043557950687665, - "loss": 3.1536, - "step": 42095 - }, - { - "epoch": 0.73, - "learning_rate": 0.0001042934379281876, - "loss": 3.2327, - "step": 42100 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010423109550373913, - "loss": 3.122, - "step": 42105 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010416876780010898, - "loss": 3.1947, - "step": 42110 - }, - { - "epoch": 0.73, - "learning_rate": 0.0001041064548219838, - "loss": 3.2346, - "step": 42115 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010404415657404935, - "loss": 3.2128, - "step": 42120 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010398187306099007, - "loss": 3.2819, - "step": 42125 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010391960428748938, - "loss": 3.1558, - "step": 42130 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010385735025822954, - "loss": 3.1389, - "step": 42135 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010379511097789162, - "loss": 3.2378, - "step": 42140 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010373288645115594, - "loss": 3.1614, - "step": 42145 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010367067668270137, - "loss": 3.106, - "step": 42150 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010360848167720561, - "loss": 3.1168, - "step": 42155 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010354630143934578, - "loss": 3.2302, - "step": 42160 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010348413597379714, - "loss": 3.1316, - "step": 42165 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010342198528523423, - "loss": 3.2556, - "step": 42170 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010335984937833069, - "loss": 3.1538, - "step": 42175 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010329772825775871, - "loss": 3.0533, - "step": 42180 - }, - { - "epoch": 0.73, - "learning_rate": 0.0001032356219281895, - "loss": 3.0731, - "step": 42185 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010317353039429312, - "loss": 3.1566, - "step": 42190 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010311145366073845, - "loss": 3.0046, - "step": 42195 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010304939173219347, - "loss": 3.1053, - "step": 42200 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010298734461332493, - "loss": 3.1393, - "step": 42205 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010292531230879836, - "loss": 3.1764, - "step": 42210 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010286329482327833, - "loss": 3.2309, - "step": 42215 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010280129216142815, - "loss": 3.2166, - "step": 42220 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010273930432791024, - "loss": 3.0617, - "step": 42225 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010267733132738571, - "loss": 3.2347, - "step": 42230 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010261537316451457, - "loss": 3.1509, - "step": 42235 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010255342984395583, - "loss": 3.1592, - "step": 42240 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010249150137036725, - "loss": 3.2813, - "step": 42245 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010242958774840544, - "loss": 3.0992, - "step": 42250 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010236768898272622, - "loss": 3.1728, - "step": 42255 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010230580507798392, - "loss": 3.1918, - "step": 42260 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010224393603883195, - "loss": 3.1306, - "step": 42265 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010218208186992248, - "loss": 3.1408, - "step": 42270 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010212024257590652, - "loss": 3.1967, - "step": 42275 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010205841816143437, - "loss": 3.2242, - "step": 42280 - }, - { - "epoch": 0.73, - "learning_rate": 0.0001019966086311547, - "loss": 3.2118, - "step": 42285 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010193481398971532, - "loss": 3.195, - "step": 42290 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010187303424176285, - "loss": 3.2024, - "step": 42295 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010181126939194269, - "loss": 3.2437, - "step": 42300 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010174951944489953, - "loss": 3.1947, - "step": 42305 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010168778440527656, - "loss": 3.2454, - "step": 42310 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010162606427771568, - "loss": 3.2738, - "step": 42315 - }, - { - "epoch": 0.73, - "learning_rate": 0.0001015643590668582, - "loss": 3.1613, - "step": 42320 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010150266877734395, - "loss": 3.117, - "step": 42325 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010144099341381165, - "loss": 3.143, - "step": 42330 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010137933298089923, - "loss": 3.2231, - "step": 42335 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010131768748324284, - "loss": 3.1593, - "step": 42340 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010125605692547824, - "loss": 3.1333, - "step": 42345 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010119444131223962, - "loss": 3.1118, - "step": 42350 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010113284064816004, - "loss": 3.1835, - "step": 42355 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010107125493787187, - "loss": 3.2863, - "step": 42360 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010100968418600574, - "loss": 3.1489, - "step": 42365 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010094812839719143, - "loss": 3.1761, - "step": 42370 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010088658757605786, - "loss": 3.0487, - "step": 42375 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010082506172723244, - "loss": 3.0552, - "step": 42380 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010076355085534155, - "loss": 3.1523, - "step": 42385 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010070205496501075, - "loss": 3.1273, - "step": 42390 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010064057406086381, - "loss": 3.1326, - "step": 42395 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010057910814752414, - "loss": 3.0923, - "step": 42400 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010051765722961349, - "loss": 3.09, - "step": 42405 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010045622131175262, - "loss": 3.2578, - "step": 42410 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010039480039856145, - "loss": 3.0941, - "step": 42415 - }, - { - "epoch": 0.73, - "learning_rate": 0.0001003333944946581, - "loss": 3.2674, - "step": 42420 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010027200360466032, - "loss": 3.1428, - "step": 42425 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010021062773318431, - "loss": 3.2387, - "step": 42430 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010014926688484507, - "loss": 3.0526, - "step": 42435 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010008792106425693, - "loss": 3.151, - "step": 42440 - }, - { - "epoch": 0.73, - "learning_rate": 0.00010002659027603253, - "loss": 3.113, - "step": 42445 - }, - { - "epoch": 0.73, - "learning_rate": 9.996527452478354e-05, - "loss": 3.2201, - "step": 42450 - }, - { - "epoch": 0.73, - "learning_rate": 9.990397381512088e-05, - "loss": 3.1862, - "step": 42455 - }, - { - "epoch": 0.73, - "learning_rate": 9.984268815165389e-05, - "loss": 3.2709, - "step": 42460 - }, - { - "epoch": 0.73, - "learning_rate": 9.978141753899098e-05, - "loss": 3.2524, - "step": 42465 - }, - { - "epoch": 0.73, - "learning_rate": 9.972016198173938e-05, - "loss": 3.12, - "step": 42470 - }, - { - "epoch": 0.74, - "learning_rate": 9.965892148450506e-05, - "loss": 3.2258, - "step": 42475 - }, - { - "epoch": 0.74, - "learning_rate": 9.959769605189322e-05, - "loss": 3.163, - "step": 42480 - }, - { - "epoch": 0.74, - "learning_rate": 9.953648568850758e-05, - "loss": 3.2217, - "step": 42485 - }, - { - "epoch": 0.74, - "learning_rate": 9.947529039895088e-05, - "loss": 3.1551, - "step": 42490 - }, - { - "epoch": 0.74, - "learning_rate": 9.941411018782465e-05, - "loss": 3.0359, - "step": 42495 - }, - { - "epoch": 0.74, - "learning_rate": 9.935294505972927e-05, - "loss": 3.2666, - "step": 42500 - }, - { - "epoch": 0.74, - "eval_loss": 3.201066255569458, - "eval_runtime": 149.9729, - "eval_samples_per_second": 12.276, - "eval_steps_per_second": 0.773, - "step": 42500 - }, - { - "epoch": 0.74, - "learning_rate": 9.929179501926418e-05, - "loss": 3.1589, - "step": 42505 - }, - { - "epoch": 0.74, - "learning_rate": 9.923066007102752e-05, - "loss": 3.2029, - "step": 42510 - }, - { - "epoch": 0.74, - "learning_rate": 9.91695402196163e-05, - "loss": 3.2643, - "step": 42515 - }, - { - "epoch": 0.74, - "learning_rate": 9.910843546962638e-05, - "loss": 3.1305, - "step": 42520 - }, - { - "epoch": 0.74, - "learning_rate": 9.904734582565261e-05, - "loss": 3.1963, - "step": 42525 - }, - { - "epoch": 0.74, - "learning_rate": 9.898627129228842e-05, - "loss": 3.2523, - "step": 42530 - }, - { - "epoch": 0.74, - "learning_rate": 9.89252118741266e-05, - "loss": 3.2724, - "step": 42535 - }, - { - "epoch": 0.74, - "learning_rate": 9.886416757575832e-05, - "loss": 3.1106, - "step": 42540 - }, - { - "epoch": 0.74, - "learning_rate": 9.880313840177383e-05, - "loss": 3.0961, - "step": 42545 - }, - { - "epoch": 0.74, - "learning_rate": 9.874212435676224e-05, - "loss": 3.1022, - "step": 42550 - }, - { - "epoch": 0.74, - "learning_rate": 9.868112544531136e-05, - "loss": 3.2171, - "step": 42555 - }, - { - "epoch": 0.74, - "learning_rate": 9.862014167200818e-05, - "loss": 3.1618, - "step": 42560 - }, - { - "epoch": 0.74, - "learning_rate": 9.855917304143833e-05, - "loss": 3.1864, - "step": 42565 - }, - { - "epoch": 0.74, - "learning_rate": 9.849821955818625e-05, - "loss": 3.1131, - "step": 42570 - }, - { - "epoch": 0.74, - "learning_rate": 9.843728122683545e-05, - "loss": 3.276, - "step": 42575 - }, - { - "epoch": 0.74, - "learning_rate": 9.837635805196792e-05, - "loss": 3.1902, - "step": 42580 - }, - { - "epoch": 0.74, - "learning_rate": 9.831545003816512e-05, - "loss": 3.0883, - "step": 42585 - }, - { - "epoch": 0.74, - "learning_rate": 9.825455719000695e-05, - "loss": 3.1129, - "step": 42590 - }, - { - "epoch": 0.74, - "learning_rate": 9.819367951207196e-05, - "loss": 3.077, - "step": 42595 - }, - { - "epoch": 0.74, - "learning_rate": 9.813281700893813e-05, - "loss": 3.1294, - "step": 42600 - }, - { - "epoch": 0.74, - "learning_rate": 9.807196968518187e-05, - "loss": 3.2189, - "step": 42605 - }, - { - "epoch": 0.74, - "learning_rate": 9.801113754537855e-05, - "loss": 3.2169, - "step": 42610 - }, - { - "epoch": 0.74, - "learning_rate": 9.795032059410263e-05, - "loss": 3.2404, - "step": 42615 - }, - { - "epoch": 0.74, - "learning_rate": 9.788951883592709e-05, - "loss": 3.1659, - "step": 42620 - }, - { - "epoch": 0.74, - "learning_rate": 9.782873227542393e-05, - "loss": 3.0816, - "step": 42625 - }, - { - "epoch": 0.74, - "learning_rate": 9.7767960917164e-05, - "loss": 3.1314, - "step": 42630 - }, - { - "epoch": 0.74, - "learning_rate": 9.770720476571688e-05, - "loss": 3.2118, - "step": 42635 - }, - { - "epoch": 0.74, - "learning_rate": 9.764646382565133e-05, - "loss": 3.1068, - "step": 42640 - }, - { - "epoch": 0.74, - "learning_rate": 9.758573810153467e-05, - "loss": 3.1281, - "step": 42645 - }, - { - "epoch": 0.74, - "learning_rate": 9.752502759793312e-05, - "loss": 3.1041, - "step": 42650 - }, - { - "epoch": 0.74, - "learning_rate": 9.746433231941186e-05, - "loss": 3.3104, - "step": 42655 - }, - { - "epoch": 0.74, - "learning_rate": 9.740365227053469e-05, - "loss": 3.2378, - "step": 42660 - }, - { - "epoch": 0.74, - "learning_rate": 9.734298745586472e-05, - "loss": 3.2226, - "step": 42665 - }, - { - "epoch": 0.74, - "learning_rate": 9.728233787996356e-05, - "loss": 3.1264, - "step": 42670 - }, - { - "epoch": 0.74, - "learning_rate": 9.72217035473915e-05, - "loss": 3.165, - "step": 42675 - }, - { - "epoch": 0.74, - "learning_rate": 9.716108446270822e-05, - "loss": 3.1995, - "step": 42680 - }, - { - "epoch": 0.74, - "learning_rate": 9.710048063047184e-05, - "loss": 3.187, - "step": 42685 - }, - { - "epoch": 0.74, - "learning_rate": 9.703989205523936e-05, - "loss": 3.1576, - "step": 42690 - }, - { - "epoch": 0.74, - "learning_rate": 9.697931874156707e-05, - "loss": 3.2503, - "step": 42695 - }, - { - "epoch": 0.74, - "learning_rate": 9.691876069400931e-05, - "loss": 3.1001, - "step": 42700 - }, - { - "epoch": 0.74, - "learning_rate": 9.685821791712011e-05, - "loss": 3.2139, - "step": 42705 - }, - { - "epoch": 0.74, - "learning_rate": 9.679769041545181e-05, - "loss": 3.0835, - "step": 42710 - }, - { - "epoch": 0.74, - "learning_rate": 9.673717819355571e-05, - "loss": 3.2361, - "step": 42715 - }, - { - "epoch": 0.74, - "learning_rate": 9.66766812559823e-05, - "loss": 3.1695, - "step": 42720 - }, - { - "epoch": 0.74, - "learning_rate": 9.661619960728026e-05, - "loss": 3.1172, - "step": 42725 - }, - { - "epoch": 0.74, - "learning_rate": 9.655573325199778e-05, - "loss": 3.2464, - "step": 42730 - }, - { - "epoch": 0.74, - "learning_rate": 9.649528219468151e-05, - "loss": 3.22, - "step": 42735 - }, - { - "epoch": 0.74, - "learning_rate": 9.643484643987698e-05, - "loss": 3.144, - "step": 42740 - }, - { - "epoch": 0.74, - "learning_rate": 9.637442599212894e-05, - "loss": 3.16, - "step": 42745 - }, - { - "epoch": 0.74, - "learning_rate": 9.631402085598038e-05, - "loss": 3.1275, - "step": 42750 - }, - { - "epoch": 0.74, - "learning_rate": 9.625363103597352e-05, - "loss": 3.2621, - "step": 42755 - }, - { - "epoch": 0.74, - "learning_rate": 9.61932565366495e-05, - "loss": 3.1393, - "step": 42760 - }, - { - "epoch": 0.74, - "learning_rate": 9.613289736254807e-05, - "loss": 3.1324, - "step": 42765 - }, - { - "epoch": 0.74, - "learning_rate": 9.607255351820802e-05, - "loss": 2.9853, - "step": 42770 - }, - { - "epoch": 0.74, - "learning_rate": 9.601222500816679e-05, - "loss": 3.1869, - "step": 42775 - }, - { - "epoch": 0.74, - "learning_rate": 9.595191183696073e-05, - "loss": 3.0711, - "step": 42780 - }, - { - "epoch": 0.74, - "learning_rate": 9.589161400912524e-05, - "loss": 3.069, - "step": 42785 - }, - { - "epoch": 0.74, - "learning_rate": 9.583133152919437e-05, - "loss": 3.1541, - "step": 42790 - }, - { - "epoch": 0.74, - "learning_rate": 9.577106440170101e-05, - "loss": 3.1853, - "step": 42795 - }, - { - "epoch": 0.74, - "learning_rate": 9.571081263117695e-05, - "loss": 3.1427, - "step": 42800 - }, - { - "epoch": 0.74, - "learning_rate": 9.565057622215274e-05, - "loss": 3.0839, - "step": 42805 - }, - { - "epoch": 0.74, - "learning_rate": 9.5590355179158e-05, - "loss": 3.1947, - "step": 42810 - }, - { - "epoch": 0.74, - "learning_rate": 9.553014950672097e-05, - "loss": 3.1567, - "step": 42815 - }, - { - "epoch": 0.74, - "learning_rate": 9.54699592093688e-05, - "loss": 3.2465, - "step": 42820 - }, - { - "epoch": 0.74, - "learning_rate": 9.540978429162751e-05, - "loss": 3.1557, - "step": 42825 - }, - { - "epoch": 0.74, - "learning_rate": 9.534962475802193e-05, - "loss": 3.0594, - "step": 42830 - }, - { - "epoch": 0.74, - "learning_rate": 9.528948061307565e-05, - "loss": 3.0756, - "step": 42835 - }, - { - "epoch": 0.74, - "learning_rate": 9.522935186131141e-05, - "loss": 3.2361, - "step": 42840 - }, - { - "epoch": 0.74, - "learning_rate": 9.516923850725044e-05, - "loss": 3.1698, - "step": 42845 - }, - { - "epoch": 0.74, - "learning_rate": 9.510914055541304e-05, - "loss": 3.2094, - "step": 42850 - }, - { - "epoch": 0.74, - "learning_rate": 9.504905801031819e-05, - "loss": 3.1183, - "step": 42855 - }, - { - "epoch": 0.74, - "learning_rate": 9.498899087648373e-05, - "loss": 3.207, - "step": 42860 - }, - { - "epoch": 0.74, - "learning_rate": 9.492893915842661e-05, - "loss": 3.1721, - "step": 42865 - }, - { - "epoch": 0.74, - "learning_rate": 9.486890286066229e-05, - "loss": 3.2029, - "step": 42870 - }, - { - "epoch": 0.74, - "learning_rate": 9.480888198770516e-05, - "loss": 3.1417, - "step": 42875 - }, - { - "epoch": 0.74, - "learning_rate": 9.474887654406857e-05, - "loss": 3.1568, - "step": 42880 - }, - { - "epoch": 0.74, - "learning_rate": 9.468888653426444e-05, - "loss": 3.1464, - "step": 42885 - }, - { - "epoch": 0.74, - "learning_rate": 9.462891196280393e-05, - "loss": 3.179, - "step": 42890 - }, - { - "epoch": 0.74, - "learning_rate": 9.456895283419674e-05, - "loss": 3.1786, - "step": 42895 - }, - { - "epoch": 0.74, - "learning_rate": 9.450900915295147e-05, - "loss": 3.0743, - "step": 42900 - }, - { - "epoch": 0.74, - "learning_rate": 9.444908092357558e-05, - "loss": 3.1999, - "step": 42905 - }, - { - "epoch": 0.74, - "learning_rate": 9.438916815057541e-05, - "loss": 3.1467, - "step": 42910 - }, - { - "epoch": 0.74, - "learning_rate": 9.432927083845592e-05, - "loss": 3.201, - "step": 42915 - }, - { - "epoch": 0.74, - "learning_rate": 9.426938899172132e-05, - "loss": 3.2788, - "step": 42920 - }, - { - "epoch": 0.74, - "learning_rate": 9.420952261487434e-05, - "loss": 3.2115, - "step": 42925 - }, - { - "epoch": 0.74, - "learning_rate": 9.41496717124166e-05, - "loss": 3.1825, - "step": 42930 - }, - { - "epoch": 0.74, - "learning_rate": 9.408983628884856e-05, - "loss": 3.1825, - "step": 42935 - }, - { - "epoch": 0.74, - "learning_rate": 9.403001634866948e-05, - "loss": 3.0352, - "step": 42940 - }, - { - "epoch": 0.74, - "learning_rate": 9.397021189637765e-05, - "loss": 3.0817, - "step": 42945 - }, - { - "epoch": 0.74, - "learning_rate": 9.391042293647012e-05, - "loss": 3.1235, - "step": 42950 - }, - { - "epoch": 0.74, - "learning_rate": 9.385064947344241e-05, - "loss": 3.1679, - "step": 42955 - }, - { - "epoch": 0.74, - "learning_rate": 9.379089151178945e-05, - "loss": 3.1045, - "step": 42960 - }, - { - "epoch": 0.74, - "learning_rate": 9.373114905600464e-05, - "loss": 3.146, - "step": 42965 - }, - { - "epoch": 0.74, - "learning_rate": 9.367142211058023e-05, - "loss": 3.2207, - "step": 42970 - }, - { - "epoch": 0.74, - "learning_rate": 9.361171068000762e-05, - "loss": 3.2523, - "step": 42975 - }, - { - "epoch": 0.74, - "learning_rate": 9.355201476877647e-05, - "loss": 3.078, - "step": 42980 - }, - { - "epoch": 0.74, - "learning_rate": 9.349233438137589e-05, - "loss": 3.1585, - "step": 42985 - }, - { - "epoch": 0.74, - "learning_rate": 9.343266952229341e-05, - "loss": 3.171, - "step": 42990 - }, - { - "epoch": 0.74, - "learning_rate": 9.337302019601545e-05, - "loss": 3.2222, - "step": 42995 - }, - { - "epoch": 0.74, - "learning_rate": 9.331338640702764e-05, - "loss": 3.1117, - "step": 43000 - }, - { - "epoch": 0.74, - "eval_loss": 3.1977977752685547, - "eval_runtime": 149.6717, - "eval_samples_per_second": 12.3, - "eval_steps_per_second": 0.775, - "step": 43000 - }, - { - "epoch": 0.74, - "learning_rate": 9.32537681598137e-05, - "loss": 3.2042, - "step": 43005 - }, - { - "epoch": 0.74, - "learning_rate": 9.319416545885693e-05, - "loss": 3.1313, - "step": 43010 - }, - { - "epoch": 0.74, - "learning_rate": 9.313457830863906e-05, - "loss": 3.169, - "step": 43015 - }, - { - "epoch": 0.74, - "learning_rate": 9.307500671364064e-05, - "loss": 3.136, - "step": 43020 - }, - { - "epoch": 0.74, - "learning_rate": 9.301545067834142e-05, - "loss": 3.1929, - "step": 43025 - }, - { - "epoch": 0.74, - "learning_rate": 9.295591020721944e-05, - "loss": 3.1781, - "step": 43030 - }, - { - "epoch": 0.74, - "learning_rate": 9.289638530475186e-05, - "loss": 3.1836, - "step": 43035 - }, - { - "epoch": 0.74, - "learning_rate": 9.283687597541477e-05, - "loss": 3.1999, - "step": 43040 - }, - { - "epoch": 0.74, - "learning_rate": 9.277738222368294e-05, - "loss": 3.0913, - "step": 43045 - }, - { - "epoch": 0.75, - "learning_rate": 9.271790405402996e-05, - "loss": 3.2435, - "step": 43050 - }, - { - "epoch": 0.75, - "learning_rate": 9.265844147092832e-05, - "loss": 3.295, - "step": 43055 - }, - { - "epoch": 0.75, - "learning_rate": 9.259899447884915e-05, - "loss": 3.1696, - "step": 43060 - }, - { - "epoch": 0.75, - "learning_rate": 9.253956308226277e-05, - "loss": 3.0823, - "step": 43065 - }, - { - "epoch": 0.75, - "learning_rate": 9.248014728563801e-05, - "loss": 3.1608, - "step": 43070 - }, - { - "epoch": 0.75, - "learning_rate": 9.242074709344258e-05, - "loss": 3.1251, - "step": 43075 - }, - { - "epoch": 0.75, - "learning_rate": 9.236136251014336e-05, - "loss": 3.1621, - "step": 43080 - }, - { - "epoch": 0.75, - "learning_rate": 9.230199354020533e-05, - "loss": 3.2057, - "step": 43085 - }, - { - "epoch": 0.75, - "learning_rate": 9.224264018809304e-05, - "loss": 3.1949, - "step": 43090 - }, - { - "epoch": 0.75, - "learning_rate": 9.218330245826948e-05, - "loss": 3.1971, - "step": 43095 - }, - { - "epoch": 0.75, - "learning_rate": 9.212398035519641e-05, - "loss": 3.0869, - "step": 43100 - }, - { - "epoch": 0.75, - "learning_rate": 9.206467388333491e-05, - "loss": 3.093, - "step": 43105 - }, - { - "epoch": 0.75, - "learning_rate": 9.200538304714416e-05, - "loss": 3.1078, - "step": 43110 - }, - { - "epoch": 0.75, - "learning_rate": 9.194610785108253e-05, - "loss": 3.2347, - "step": 43115 - }, - { - "epoch": 0.75, - "learning_rate": 9.188684829960746e-05, - "loss": 3.1788, - "step": 43120 - }, - { - "epoch": 0.75, - "learning_rate": 9.182760439717482e-05, - "loss": 3.1977, - "step": 43125 - }, - { - "epoch": 0.75, - "learning_rate": 9.176837614823945e-05, - "loss": 3.1689, - "step": 43130 - }, - { - "epoch": 0.75, - "learning_rate": 9.170916355725505e-05, - "loss": 3.1488, - "step": 43135 - }, - { - "epoch": 0.75, - "learning_rate": 9.164996662867398e-05, - "loss": 3.1792, - "step": 43140 - }, - { - "epoch": 0.75, - "learning_rate": 9.159078536694774e-05, - "loss": 3.1396, - "step": 43145 - }, - { - "epoch": 0.75, - "learning_rate": 9.153161977652634e-05, - "loss": 3.1108, - "step": 43150 - }, - { - "epoch": 0.75, - "learning_rate": 9.147246986185874e-05, - "loss": 3.0777, - "step": 43155 - }, - { - "epoch": 0.75, - "learning_rate": 9.141333562739275e-05, - "loss": 3.2488, - "step": 43160 - }, - { - "epoch": 0.75, - "learning_rate": 9.135421707757483e-05, - "loss": 3.2371, - "step": 43165 - }, - { - "epoch": 0.75, - "learning_rate": 9.129511421685058e-05, - "loss": 3.1491, - "step": 43170 - }, - { - "epoch": 0.75, - "learning_rate": 9.123602704966415e-05, - "loss": 3.2448, - "step": 43175 - }, - { - "epoch": 0.75, - "learning_rate": 9.117695558045858e-05, - "loss": 3.1201, - "step": 43180 - }, - { - "epoch": 0.75, - "learning_rate": 9.111789981367576e-05, - "loss": 3.1906, - "step": 43185 - }, - { - "epoch": 0.75, - "learning_rate": 9.105885975375637e-05, - "loss": 3.0943, - "step": 43190 - }, - { - "epoch": 0.75, - "learning_rate": 9.099983540513981e-05, - "loss": 3.2098, - "step": 43195 - }, - { - "epoch": 0.75, - "learning_rate": 9.094082677226462e-05, - "loss": 3.2461, - "step": 43200 - }, - { - "epoch": 0.75, - "learning_rate": 9.088183385956785e-05, - "loss": 3.2476, - "step": 43205 - }, - { - "epoch": 0.75, - "learning_rate": 9.082285667148545e-05, - "loss": 3.1644, - "step": 43210 - }, - { - "epoch": 0.75, - "learning_rate": 9.076389521245225e-05, - "loss": 3.2083, - "step": 43215 - }, - { - "epoch": 0.75, - "learning_rate": 9.070494948690165e-05, - "loss": 3.2944, - "step": 43220 - }, - { - "epoch": 0.75, - "learning_rate": 9.064601949926638e-05, - "loss": 3.3015, - "step": 43225 - }, - { - "epoch": 0.75, - "learning_rate": 9.058710525397748e-05, - "loss": 3.0546, - "step": 43230 - }, - { - "epoch": 0.75, - "learning_rate": 9.052820675546508e-05, - "loss": 3.1223, - "step": 43235 - }, - { - "epoch": 0.75, - "learning_rate": 9.046932400815798e-05, - "loss": 3.1651, - "step": 43240 - }, - { - "epoch": 0.75, - "learning_rate": 9.041045701648382e-05, - "loss": 2.9694, - "step": 43245 - }, - { - "epoch": 0.75, - "learning_rate": 9.035160578486924e-05, - "loss": 3.0904, - "step": 43250 - }, - { - "epoch": 0.75, - "learning_rate": 9.029277031773958e-05, - "loss": 3.2837, - "step": 43255 - }, - { - "epoch": 0.75, - "learning_rate": 9.023395061951864e-05, - "loss": 3.1519, - "step": 43260 - }, - { - "epoch": 0.75, - "learning_rate": 9.017514669462969e-05, - "loss": 3.2259, - "step": 43265 - }, - { - "epoch": 0.75, - "learning_rate": 9.011635854749439e-05, - "loss": 3.1521, - "step": 43270 - }, - { - "epoch": 0.75, - "learning_rate": 9.005758618253316e-05, - "loss": 3.1211, - "step": 43275 - }, - { - "epoch": 0.75, - "learning_rate": 8.999882960416572e-05, - "loss": 3.2336, - "step": 43280 - }, - { - "epoch": 0.75, - "learning_rate": 8.994008881680989e-05, - "loss": 3.1052, - "step": 43285 - }, - { - "epoch": 0.75, - "learning_rate": 8.988136382488292e-05, - "loss": 3.1502, - "step": 43290 - }, - { - "epoch": 0.75, - "learning_rate": 8.982265463280054e-05, - "loss": 3.1397, - "step": 43295 - }, - { - "epoch": 0.75, - "learning_rate": 8.976396124497731e-05, - "loss": 3.2139, - "step": 43300 - }, - { - "epoch": 0.75, - "learning_rate": 8.97052836658268e-05, - "loss": 3.1242, - "step": 43305 - }, - { - "epoch": 0.75, - "learning_rate": 8.964662189976128e-05, - "loss": 3.2127, - "step": 43310 - }, - { - "epoch": 0.75, - "learning_rate": 8.95879759511917e-05, - "loss": 3.1406, - "step": 43315 - }, - { - "epoch": 0.75, - "learning_rate": 8.9529345824528e-05, - "loss": 3.1651, - "step": 43320 - }, - { - "epoch": 0.75, - "learning_rate": 8.947073152417877e-05, - "loss": 3.1555, - "step": 43325 - }, - { - "epoch": 0.75, - "learning_rate": 8.941213305455166e-05, - "loss": 3.2542, - "step": 43330 - }, - { - "epoch": 0.75, - "learning_rate": 8.9353550420053e-05, - "loss": 3.2307, - "step": 43335 - }, - { - "epoch": 0.75, - "learning_rate": 8.929498362508762e-05, - "loss": 3.239, - "step": 43340 - }, - { - "epoch": 0.75, - "learning_rate": 8.92364326740597e-05, - "loss": 3.096, - "step": 43345 - }, - { - "epoch": 0.75, - "learning_rate": 8.91778975713719e-05, - "loss": 3.2674, - "step": 43350 - }, - { - "epoch": 0.75, - "learning_rate": 8.911937832142569e-05, - "loss": 3.0949, - "step": 43355 - }, - { - "epoch": 0.75, - "learning_rate": 8.906087492862166e-05, - "loss": 3.157, - "step": 43360 - }, - { - "epoch": 0.75, - "learning_rate": 8.900238739735859e-05, - "loss": 3.1264, - "step": 43365 - }, - { - "epoch": 0.75, - "learning_rate": 8.894391573203475e-05, - "loss": 3.2266, - "step": 43370 - }, - { - "epoch": 0.75, - "learning_rate": 8.888545993704678e-05, - "loss": 3.1276, - "step": 43375 - }, - { - "epoch": 0.75, - "learning_rate": 8.882702001679017e-05, - "loss": 3.1758, - "step": 43380 - }, - { - "epoch": 0.75, - "learning_rate": 8.876859597565965e-05, - "loss": 3.1014, - "step": 43385 - }, - { - "epoch": 0.75, - "learning_rate": 8.871018781804796e-05, - "loss": 3.261, - "step": 43390 - }, - { - "epoch": 0.75, - "learning_rate": 8.865179554834739e-05, - "loss": 3.098, - "step": 43395 - }, - { - "epoch": 0.75, - "learning_rate": 8.859341917094863e-05, - "loss": 3.3551, - "step": 43400 - }, - { - "epoch": 0.75, - "learning_rate": 8.853505869024127e-05, - "loss": 3.2433, - "step": 43405 - }, - { - "epoch": 0.75, - "learning_rate": 8.847671411061389e-05, - "loss": 3.1784, - "step": 43410 - }, - { - "epoch": 0.75, - "learning_rate": 8.841838543645353e-05, - "loss": 3.1108, - "step": 43415 - }, - { - "epoch": 0.75, - "learning_rate": 8.83600726721461e-05, - "loss": 3.2022, - "step": 43420 - }, - { - "epoch": 0.75, - "learning_rate": 8.830177582207669e-05, - "loss": 3.0931, - "step": 43425 - }, - { - "epoch": 0.75, - "learning_rate": 8.824349489062876e-05, - "loss": 3.24, - "step": 43430 - }, - { - "epoch": 0.75, - "learning_rate": 8.818522988218483e-05, - "loss": 3.1067, - "step": 43435 - }, - { - "epoch": 0.75, - "learning_rate": 8.812698080112607e-05, - "loss": 3.0216, - "step": 43440 - }, - { - "epoch": 0.75, - "learning_rate": 8.806874765183241e-05, - "loss": 3.09, - "step": 43445 - }, - { - "epoch": 0.75, - "learning_rate": 8.80105304386829e-05, - "loss": 3.1525, - "step": 43450 - }, - { - "epoch": 0.75, - "learning_rate": 8.795232916605506e-05, - "loss": 3.2424, - "step": 43455 - }, - { - "epoch": 0.75, - "learning_rate": 8.789414383832535e-05, - "loss": 3.2136, - "step": 43460 - }, - { - "epoch": 0.75, - "learning_rate": 8.783597445986901e-05, - "loss": 3.1622, - "step": 43465 - }, - { - "epoch": 0.75, - "learning_rate": 8.777782103506007e-05, - "loss": 3.2465, - "step": 43470 - }, - { - "epoch": 0.75, - "learning_rate": 8.771968356827123e-05, - "loss": 3.1255, - "step": 43475 - }, - { - "epoch": 0.75, - "learning_rate": 8.766156206387442e-05, - "loss": 3.1675, - "step": 43480 - }, - { - "epoch": 0.75, - "learning_rate": 8.760345652623987e-05, - "loss": 3.1265, - "step": 43485 - }, - { - "epoch": 0.75, - "learning_rate": 8.75453669597369e-05, - "loss": 3.0942, - "step": 43490 - }, - { - "epoch": 0.75, - "learning_rate": 8.748729336873353e-05, - "loss": 3.1477, - "step": 43495 - }, - { - "epoch": 0.75, - "learning_rate": 8.742923575759649e-05, - "loss": 3.1318, - "step": 43500 - }, - { - "epoch": 0.75, - "eval_loss": 3.1929843425750732, - "eval_runtime": 150.078, - "eval_samples_per_second": 12.267, - "eval_steps_per_second": 0.773, - "step": 43500 - }, - { - "epoch": 0.75, - "learning_rate": 8.737119413069158e-05, - "loss": 3.221, - "step": 43505 - }, - { - "epoch": 0.75, - "learning_rate": 8.73131684923832e-05, - "loss": 3.0582, - "step": 43510 - }, - { - "epoch": 0.75, - "learning_rate": 8.725515884703453e-05, - "loss": 3.1917, - "step": 43515 - }, - { - "epoch": 0.75, - "learning_rate": 8.719716519900763e-05, - "loss": 3.1326, - "step": 43520 - }, - { - "epoch": 0.75, - "learning_rate": 8.713918755266321e-05, - "loss": 3.0365, - "step": 43525 - }, - { - "epoch": 0.75, - "learning_rate": 8.708122591236109e-05, - "loss": 3.181, - "step": 43530 - }, - { - "epoch": 0.75, - "learning_rate": 8.702328028245956e-05, - "loss": 3.2332, - "step": 43535 - }, - { - "epoch": 0.75, - "learning_rate": 8.69653506673159e-05, - "loss": 3.1744, - "step": 43540 - }, - { - "epoch": 0.75, - "learning_rate": 8.690743707128605e-05, - "loss": 3.0682, - "step": 43545 - }, - { - "epoch": 0.75, - "learning_rate": 8.68495394987249e-05, - "loss": 3.3009, - "step": 43550 - }, - { - "epoch": 0.75, - "learning_rate": 8.679165795398587e-05, - "loss": 3.126, - "step": 43555 - }, - { - "epoch": 0.75, - "learning_rate": 8.67337924414216e-05, - "loss": 3.2137, - "step": 43560 - }, - { - "epoch": 0.75, - "learning_rate": 8.667594296538315e-05, - "loss": 3.1615, - "step": 43565 - }, - { - "epoch": 0.75, - "learning_rate": 8.661810953022052e-05, - "loss": 3.1869, - "step": 43570 - }, - { - "epoch": 0.75, - "learning_rate": 8.656029214028249e-05, - "loss": 3.1582, - "step": 43575 - }, - { - "epoch": 0.75, - "learning_rate": 8.650249079991654e-05, - "loss": 3.0512, - "step": 43580 - }, - { - "epoch": 0.75, - "learning_rate": 8.644470551346921e-05, - "loss": 3.1223, - "step": 43585 - }, - { - "epoch": 0.75, - "learning_rate": 8.638693628528559e-05, - "loss": 3.2332, - "step": 43590 - }, - { - "epoch": 0.75, - "learning_rate": 8.63291831197096e-05, - "loss": 3.0745, - "step": 43595 - }, - { - "epoch": 0.75, - "learning_rate": 8.627144602108399e-05, - "loss": 3.1756, - "step": 43600 - }, - { - "epoch": 0.75, - "learning_rate": 8.621372499375021e-05, - "loss": 3.2514, - "step": 43605 - }, - { - "epoch": 0.75, - "learning_rate": 8.615602004204876e-05, - "loss": 3.1066, - "step": 43610 - }, - { - "epoch": 0.75, - "learning_rate": 8.609833117031879e-05, - "loss": 3.1709, - "step": 43615 - }, - { - "epoch": 0.75, - "learning_rate": 8.604065838289788e-05, - "loss": 3.0782, - "step": 43620 - }, - { - "epoch": 0.75, - "learning_rate": 8.598300168412301e-05, - "loss": 3.1607, - "step": 43625 - }, - { - "epoch": 0.76, - "learning_rate": 8.592536107832962e-05, - "loss": 3.1411, - "step": 43630 - }, - { - "epoch": 0.76, - "learning_rate": 8.586773656985185e-05, - "loss": 3.0961, - "step": 43635 - }, - { - "epoch": 0.76, - "learning_rate": 8.581012816302309e-05, - "loss": 3.202, - "step": 43640 - }, - { - "epoch": 0.76, - "learning_rate": 8.575253586217478e-05, - "loss": 3.2263, - "step": 43645 - }, - { - "epoch": 0.76, - "learning_rate": 8.569495967163786e-05, - "loss": 3.0951, - "step": 43650 - }, - { - "epoch": 0.76, - "learning_rate": 8.56373995957417e-05, - "loss": 3.1947, - "step": 43655 - }, - { - "epoch": 0.76, - "learning_rate": 8.557985563881439e-05, - "loss": 3.2083, - "step": 43660 - }, - { - "epoch": 0.76, - "learning_rate": 8.552232780518324e-05, - "loss": 3.169, - "step": 43665 - }, - { - "epoch": 0.76, - "learning_rate": 8.546481609917367e-05, - "loss": 3.1267, - "step": 43670 - }, - { - "epoch": 0.76, - "learning_rate": 8.540732052511058e-05, - "loss": 3.288, - "step": 43675 - }, - { - "epoch": 0.76, - "learning_rate": 8.534984108731717e-05, - "loss": 3.0646, - "step": 43680 - }, - { - "epoch": 0.76, - "learning_rate": 8.529237779011557e-05, - "loss": 3.2094, - "step": 43685 - }, - { - "epoch": 0.76, - "learning_rate": 8.523493063782699e-05, - "loss": 3.1029, - "step": 43690 - }, - { - "epoch": 0.76, - "learning_rate": 8.517749963477087e-05, - "loss": 3.1713, - "step": 43695 - }, - { - "epoch": 0.76, - "learning_rate": 8.512008478526575e-05, - "loss": 3.1246, - "step": 43700 - }, - { - "epoch": 0.76, - "learning_rate": 8.50626860936291e-05, - "loss": 3.1438, - "step": 43705 - }, - { - "epoch": 0.76, - "learning_rate": 8.500530356417692e-05, - "loss": 3.2288, - "step": 43710 - }, - { - "epoch": 0.76, - "learning_rate": 8.494793720122407e-05, - "loss": 3.049, - "step": 43715 - }, - { - "epoch": 0.76, - "learning_rate": 8.489058700908424e-05, - "loss": 3.1557, - "step": 43720 - }, - { - "epoch": 0.76, - "learning_rate": 8.483325299206972e-05, - "loss": 3.1421, - "step": 43725 - }, - { - "epoch": 0.76, - "learning_rate": 8.4775935154492e-05, - "loss": 3.1228, - "step": 43730 - }, - { - "epoch": 0.76, - "learning_rate": 8.47186335006609e-05, - "loss": 3.2315, - "step": 43735 - }, - { - "epoch": 0.76, - "learning_rate": 8.466134803488532e-05, - "loss": 3.2098, - "step": 43740 - }, - { - "epoch": 0.76, - "learning_rate": 8.460407876147273e-05, - "loss": 3.0859, - "step": 43745 - }, - { - "epoch": 0.76, - "learning_rate": 8.454682568472946e-05, - "loss": 3.1922, - "step": 43750 - }, - { - "epoch": 0.76, - "learning_rate": 8.44895888089608e-05, - "loss": 3.1085, - "step": 43755 - }, - { - "epoch": 0.76, - "learning_rate": 8.44323681384706e-05, - "loss": 3.1564, - "step": 43760 - }, - { - "epoch": 0.76, - "learning_rate": 8.437516367756153e-05, - "loss": 3.2095, - "step": 43765 - }, - { - "epoch": 0.76, - "learning_rate": 8.431797543053512e-05, - "loss": 3.2012, - "step": 43770 - }, - { - "epoch": 0.76, - "learning_rate": 8.426080340169158e-05, - "loss": 3.1939, - "step": 43775 - }, - { - "epoch": 0.76, - "learning_rate": 8.42036475953299e-05, - "loss": 3.1871, - "step": 43780 - }, - { - "epoch": 0.76, - "learning_rate": 8.414650801574806e-05, - "loss": 3.1464, - "step": 43785 - }, - { - "epoch": 0.76, - "learning_rate": 8.408938466724257e-05, - "loss": 3.2299, - "step": 43790 - }, - { - "epoch": 0.76, - "learning_rate": 8.403227755410887e-05, - "loss": 3.1247, - "step": 43795 - }, - { - "epoch": 0.76, - "learning_rate": 8.397518668064109e-05, - "loss": 3.1662, - "step": 43800 - }, - { - "epoch": 0.76, - "learning_rate": 8.391811205113204e-05, - "loss": 3.1429, - "step": 43805 - }, - { - "epoch": 0.76, - "learning_rate": 8.386105366987363e-05, - "loss": 3.1746, - "step": 43810 - }, - { - "epoch": 0.76, - "learning_rate": 8.380401154115631e-05, - "loss": 3.2565, - "step": 43815 - }, - { - "epoch": 0.76, - "learning_rate": 8.374698566926937e-05, - "loss": 3.0521, - "step": 43820 - }, - { - "epoch": 0.76, - "learning_rate": 8.368997605850078e-05, - "loss": 3.1862, - "step": 43825 - }, - { - "epoch": 0.76, - "learning_rate": 8.363298271313735e-05, - "loss": 3.1848, - "step": 43830 - }, - { - "epoch": 0.76, - "learning_rate": 8.357600563746485e-05, - "loss": 3.1177, - "step": 43835 - }, - { - "epoch": 0.76, - "learning_rate": 8.351904483576758e-05, - "loss": 3.1069, - "step": 43840 - }, - { - "epoch": 0.76, - "learning_rate": 8.346210031232866e-05, - "loss": 3.2112, - "step": 43845 - }, - { - "epoch": 0.76, - "learning_rate": 8.340517207143007e-05, - "loss": 3.0668, - "step": 43850 - }, - { - "epoch": 0.76, - "learning_rate": 8.334826011735252e-05, - "loss": 3.0612, - "step": 43855 - }, - { - "epoch": 0.76, - "learning_rate": 8.32913644543754e-05, - "loss": 3.2284, - "step": 43860 - }, - { - "epoch": 0.76, - "learning_rate": 8.323448508677713e-05, - "loss": 3.1964, - "step": 43865 - }, - { - "epoch": 0.76, - "learning_rate": 8.317762201883468e-05, - "loss": 3.1696, - "step": 43870 - }, - { - "epoch": 0.76, - "learning_rate": 8.312077525482387e-05, - "loss": 3.0291, - "step": 43875 - }, - { - "epoch": 0.76, - "learning_rate": 8.306394479901928e-05, - "loss": 3.0917, - "step": 43880 - }, - { - "epoch": 0.76, - "learning_rate": 8.300713065569415e-05, - "loss": 3.1189, - "step": 43885 - }, - { - "epoch": 0.76, - "learning_rate": 8.295033282912081e-05, - "loss": 3.2172, - "step": 43890 - }, - { - "epoch": 0.76, - "learning_rate": 8.289355132357011e-05, - "loss": 3.213, - "step": 43895 - }, - { - "epoch": 0.76, - "learning_rate": 8.283678614331165e-05, - "loss": 3.2169, - "step": 43900 - }, - { - "epoch": 0.76, - "learning_rate": 8.278003729261396e-05, - "loss": 3.0397, - "step": 43905 - }, - { - "epoch": 0.76, - "learning_rate": 8.272330477574414e-05, - "loss": 3.144, - "step": 43910 - }, - { - "epoch": 0.76, - "learning_rate": 8.266658859696835e-05, - "loss": 3.0974, - "step": 43915 - }, - { - "epoch": 0.76, - "learning_rate": 8.26098887605514e-05, - "loss": 3.171, - "step": 43920 - }, - { - "epoch": 0.76, - "learning_rate": 8.255320527075651e-05, - "loss": 3.1021, - "step": 43925 - }, - { - "epoch": 0.76, - "learning_rate": 8.249653813184628e-05, - "loss": 3.2077, - "step": 43930 - }, - { - "epoch": 0.76, - "learning_rate": 8.243988734808168e-05, - "loss": 3.1471, - "step": 43935 - }, - { - "epoch": 0.76, - "learning_rate": 8.23832529237225e-05, - "loss": 3.0669, - "step": 43940 - }, - { - "epoch": 0.76, - "learning_rate": 8.232663486302761e-05, - "loss": 3.1868, - "step": 43945 - }, - { - "epoch": 0.76, - "learning_rate": 8.227003317025403e-05, - "loss": 3.2153, - "step": 43950 - }, - { - "epoch": 0.76, - "learning_rate": 8.22134478496582e-05, - "loss": 3.091, - "step": 43955 - }, - { - "epoch": 0.76, - "learning_rate": 8.215687890549499e-05, - "loss": 3.1243, - "step": 43960 - }, - { - "epoch": 0.76, - "learning_rate": 8.210032634201792e-05, - "loss": 3.2038, - "step": 43965 - }, - { - "epoch": 0.76, - "learning_rate": 8.204379016347979e-05, - "loss": 3.2283, - "step": 43970 - }, - { - "epoch": 0.76, - "learning_rate": 8.198727037413156e-05, - "loss": 3.2619, - "step": 43975 - }, - { - "epoch": 0.76, - "learning_rate": 8.193076697822316e-05, - "loss": 3.1312, - "step": 43980 - }, - { - "epoch": 0.76, - "learning_rate": 8.187427998000364e-05, - "loss": 3.0785, - "step": 43985 - }, - { - "epoch": 0.76, - "learning_rate": 8.181780938372037e-05, - "loss": 2.9717, - "step": 43990 - }, - { - "epoch": 0.76, - "learning_rate": 8.176135519361964e-05, - "loss": 3.0844, - "step": 43995 - }, - { - "epoch": 0.76, - "learning_rate": 8.170491741394658e-05, - "loss": 3.2462, - "step": 44000 - }, - { - "epoch": 0.76, - "eval_loss": 3.190524101257324, - "eval_runtime": 149.6752, - "eval_samples_per_second": 12.3, - "eval_steps_per_second": 0.775, - "step": 44000 - }, - { - "epoch": 0.76, - "learning_rate": 8.164849604894487e-05, - "loss": 3.2107, - "step": 44005 - }, - { - "epoch": 0.76, - "learning_rate": 8.159209110285734e-05, - "loss": 3.1158, - "step": 44010 - }, - { - "epoch": 0.76, - "learning_rate": 8.153570257992521e-05, - "loss": 3.1124, - "step": 44015 - }, - { - "epoch": 0.76, - "learning_rate": 8.147933048438856e-05, - "loss": 3.2516, - "step": 44020 - }, - { - "epoch": 0.76, - "learning_rate": 8.142297482048653e-05, - "loss": 3.1914, - "step": 44025 - }, - { - "epoch": 0.76, - "learning_rate": 8.136663559245643e-05, - "loss": 3.1275, - "step": 44030 - }, - { - "epoch": 0.76, - "learning_rate": 8.131031280453494e-05, - "loss": 3.1475, - "step": 44035 - }, - { - "epoch": 0.76, - "learning_rate": 8.125400646095717e-05, - "loss": 3.2376, - "step": 44040 - }, - { - "epoch": 0.76, - "learning_rate": 8.119771656595696e-05, - "loss": 3.1365, - "step": 44045 - }, - { - "epoch": 0.76, - "learning_rate": 8.114144312376731e-05, - "loss": 3.1429, - "step": 44050 - }, - { - "epoch": 0.76, - "learning_rate": 8.108518613861941e-05, - "loss": 3.1624, - "step": 44055 - }, - { - "epoch": 0.76, - "learning_rate": 8.102894561474349e-05, - "loss": 3.1822, - "step": 44060 - }, - { - "epoch": 0.76, - "learning_rate": 8.097272155636877e-05, - "loss": 3.0874, - "step": 44065 - }, - { - "epoch": 0.76, - "learning_rate": 8.091651396772286e-05, - "loss": 3.232, - "step": 44070 - }, - { - "epoch": 0.76, - "learning_rate": 8.086032285303232e-05, - "loss": 3.2127, - "step": 44075 - }, - { - "epoch": 0.76, - "learning_rate": 8.080414821652244e-05, - "loss": 3.2376, - "step": 44080 - }, - { - "epoch": 0.76, - "learning_rate": 8.074799006241716e-05, - "loss": 3.156, - "step": 44085 - }, - { - "epoch": 0.76, - "learning_rate": 8.069184839493947e-05, - "loss": 3.232, - "step": 44090 - }, - { - "epoch": 0.76, - "learning_rate": 8.063572321831084e-05, - "loss": 3.0889, - "step": 44095 - }, - { - "epoch": 0.76, - "learning_rate": 8.05796145367516e-05, - "loss": 3.2148, - "step": 44100 - }, - { - "epoch": 0.76, - "learning_rate": 8.052352235448084e-05, - "loss": 3.1608, - "step": 44105 - }, - { - "epoch": 0.76, - "learning_rate": 8.046744667571629e-05, - "loss": 3.0584, - "step": 44110 - }, - { - "epoch": 0.76, - "learning_rate": 8.041138750467478e-05, - "loss": 3.2204, - "step": 44115 - }, - { - "epoch": 0.76, - "learning_rate": 8.035534484557153e-05, - "loss": 3.0466, - "step": 44120 - }, - { - "epoch": 0.76, - "learning_rate": 8.029931870262073e-05, - "loss": 3.1303, - "step": 44125 - }, - { - "epoch": 0.76, - "learning_rate": 8.024330908003516e-05, - "loss": 3.1986, - "step": 44130 - }, - { - "epoch": 0.76, - "learning_rate": 8.018731598202655e-05, - "loss": 3.1084, - "step": 44135 - }, - { - "epoch": 0.76, - "learning_rate": 8.013133941280516e-05, - "loss": 3.2342, - "step": 44140 - }, - { - "epoch": 0.76, - "learning_rate": 8.007537937658032e-05, - "loss": 3.1171, - "step": 44145 - }, - { - "epoch": 0.76, - "learning_rate": 8.001943587755984e-05, - "loss": 3.1854, - "step": 44150 - }, - { - "epoch": 0.76, - "learning_rate": 7.996350891995045e-05, - "loss": 3.175, - "step": 44155 - }, - { - "epoch": 0.76, - "learning_rate": 7.990759850795746e-05, - "loss": 3.2055, - "step": 44160 - }, - { - "epoch": 0.76, - "learning_rate": 7.985170464578506e-05, - "loss": 3.0755, - "step": 44165 - }, - { - "epoch": 0.76, - "learning_rate": 7.97958273376363e-05, - "loss": 3.0841, - "step": 44170 - }, - { - "epoch": 0.76, - "learning_rate": 7.97399665877128e-05, - "loss": 3.1463, - "step": 44175 - }, - { - "epoch": 0.76, - "learning_rate": 7.9684122400215e-05, - "loss": 3.1655, - "step": 44180 - }, - { - "epoch": 0.76, - "learning_rate": 7.962829477934207e-05, - "loss": 3.1207, - "step": 44185 - }, - { - "epoch": 0.76, - "learning_rate": 7.957248372929192e-05, - "loss": 3.1037, - "step": 44190 - }, - { - "epoch": 0.76, - "learning_rate": 7.95166892542614e-05, - "loss": 3.125, - "step": 44195 - }, - { - "epoch": 0.76, - "learning_rate": 7.946091135844601e-05, - "loss": 3.1905, - "step": 44200 - }, - { - "epoch": 0.77, - "learning_rate": 7.940515004603963e-05, - "loss": 3.2434, - "step": 44205 - }, - { - "epoch": 0.77, - "learning_rate": 7.934940532123552e-05, - "loss": 3.1113, - "step": 44210 - }, - { - "epoch": 0.77, - "learning_rate": 7.929367718822534e-05, - "loss": 3.2207, - "step": 44215 - }, - { - "epoch": 0.77, - "learning_rate": 7.923796565119941e-05, - "loss": 3.1306, - "step": 44220 - }, - { - "epoch": 0.77, - "learning_rate": 7.918227071434729e-05, - "loss": 3.2167, - "step": 44225 - }, - { - "epoch": 0.77, - "learning_rate": 7.912659238185654e-05, - "loss": 3.0545, - "step": 44230 - }, - { - "epoch": 0.77, - "learning_rate": 7.907093065791418e-05, - "loss": 3.1117, - "step": 44235 - }, - { - "epoch": 0.77, - "learning_rate": 7.90152855467056e-05, - "loss": 3.2648, - "step": 44240 - }, - { - "epoch": 0.77, - "learning_rate": 7.895965705241489e-05, - "loss": 3.1396, - "step": 44245 - }, - { - "epoch": 0.77, - "learning_rate": 7.890404517922529e-05, - "loss": 3.1897, - "step": 44250 - }, - { - "epoch": 0.77, - "learning_rate": 7.884844993131836e-05, - "loss": 2.9301, - "step": 44255 - }, - { - "epoch": 0.77, - "learning_rate": 7.879287131287463e-05, - "loss": 3.1565, - "step": 44260 - }, - { - "epoch": 0.77, - "learning_rate": 7.873730932807332e-05, - "loss": 3.163, - "step": 44265 - }, - { - "epoch": 0.77, - "learning_rate": 7.868176398109228e-05, - "loss": 3.097, - "step": 44270 - }, - { - "epoch": 0.77, - "learning_rate": 7.862623527610847e-05, - "loss": 3.1801, - "step": 44275 - }, - { - "epoch": 0.77, - "learning_rate": 7.857072321729732e-05, - "loss": 3.0713, - "step": 44280 - }, - { - "epoch": 0.77, - "learning_rate": 7.851522780883277e-05, - "loss": 2.9592, - "step": 44285 - }, - { - "epoch": 0.77, - "learning_rate": 7.845974905488812e-05, - "loss": 3.1761, - "step": 44290 - }, - { - "epoch": 0.77, - "learning_rate": 7.840428695963497e-05, - "loss": 3.1655, - "step": 44295 - }, - { - "epoch": 0.77, - "learning_rate": 7.834884152724367e-05, - "loss": 3.0895, - "step": 44300 - }, - { - "epoch": 0.77, - "learning_rate": 7.829341276188372e-05, - "loss": 3.0921, - "step": 44305 - }, - { - "epoch": 0.77, - "learning_rate": 7.823800066772273e-05, - "loss": 3.2266, - "step": 44310 - }, - { - "epoch": 0.77, - "learning_rate": 7.81826052489277e-05, - "loss": 3.2352, - "step": 44315 - }, - { - "epoch": 0.77, - "learning_rate": 7.812722650966392e-05, - "loss": 3.1349, - "step": 44320 - }, - { - "epoch": 0.77, - "learning_rate": 7.807186445409554e-05, - "loss": 3.2131, - "step": 44325 - }, - { - "epoch": 0.77, - "learning_rate": 7.801651908638578e-05, - "loss": 3.1752, - "step": 44330 - }, - { - "epoch": 0.77, - "learning_rate": 7.796119041069595e-05, - "loss": 3.2089, - "step": 44335 - }, - { - "epoch": 0.77, - "learning_rate": 7.790587843118677e-05, - "loss": 3.1466, - "step": 44340 - }, - { - "epoch": 0.77, - "learning_rate": 7.785058315201728e-05, - "loss": 3.1338, - "step": 44345 - }, - { - "epoch": 0.77, - "learning_rate": 7.77953045773454e-05, - "loss": 3.0996, - "step": 44350 - }, - { - "epoch": 0.77, - "learning_rate": 7.7740042711328e-05, - "loss": 3.1595, - "step": 44355 - }, - { - "epoch": 0.77, - "learning_rate": 7.768479755812024e-05, - "loss": 3.1891, - "step": 44360 - }, - { - "epoch": 0.77, - "learning_rate": 7.762956912187625e-05, - "loss": 3.1434, - "step": 44365 - }, - { - "epoch": 0.77, - "learning_rate": 7.757435740674913e-05, - "loss": 3.1736, - "step": 44370 - }, - { - "epoch": 0.77, - "learning_rate": 7.751916241689042e-05, - "loss": 3.1572, - "step": 44375 - }, - { - "epoch": 0.77, - "learning_rate": 7.746398415645048e-05, - "loss": 3.0956, - "step": 44380 - }, - { - "epoch": 0.77, - "learning_rate": 7.740882262957847e-05, - "loss": 3.0352, - "step": 44385 - }, - { - "epoch": 0.77, - "learning_rate": 7.735367784042214e-05, - "loss": 3.11, - "step": 44390 - }, - { - "epoch": 0.77, - "learning_rate": 7.729854979312828e-05, - "loss": 3.1855, - "step": 44395 - }, - { - "epoch": 0.77, - "learning_rate": 7.724343849184217e-05, - "loss": 3.2492, - "step": 44400 - }, - { - "epoch": 0.77, - "learning_rate": 7.718834394070786e-05, - "loss": 3.1713, - "step": 44405 - }, - { - "epoch": 0.77, - "learning_rate": 7.713326614386818e-05, - "loss": 3.157, - "step": 44410 - }, - { - "epoch": 0.77, - "learning_rate": 7.707820510546466e-05, - "loss": 3.1427, - "step": 44415 - }, - { - "epoch": 0.77, - "learning_rate": 7.702316082963773e-05, - "loss": 3.1726, - "step": 44420 - }, - { - "epoch": 0.77, - "learning_rate": 7.696813332052634e-05, - "loss": 3.2024, - "step": 44425 - }, - { - "epoch": 0.77, - "learning_rate": 7.691312258226834e-05, - "loss": 3.1871, - "step": 44430 - }, - { - "epoch": 0.77, - "learning_rate": 7.685812861900023e-05, - "loss": 3.1701, - "step": 44435 - }, - { - "epoch": 0.77, - "learning_rate": 7.680315143485724e-05, - "loss": 3.1798, - "step": 44440 - }, - { - "epoch": 0.77, - "learning_rate": 7.67481910339733e-05, - "loss": 3.1527, - "step": 44445 - }, - { - "epoch": 0.77, - "learning_rate": 7.669324742048133e-05, - "loss": 3.2117, - "step": 44450 - }, - { - "epoch": 0.77, - "learning_rate": 7.663832059851276e-05, - "loss": 3.2039, - "step": 44455 - }, - { - "epoch": 0.77, - "learning_rate": 7.658341057219775e-05, - "loss": 3.1415, - "step": 44460 - }, - { - "epoch": 0.77, - "learning_rate": 7.652851734566529e-05, - "loss": 3.1103, - "step": 44465 - }, - { - "epoch": 0.77, - "learning_rate": 7.647364092304292e-05, - "loss": 3.1017, - "step": 44470 - }, - { - "epoch": 0.77, - "learning_rate": 7.641878130845732e-05, - "loss": 3.0508, - "step": 44475 - }, - { - "epoch": 0.77, - "learning_rate": 7.636393850603352e-05, - "loss": 3.142, - "step": 44480 - }, - { - "epoch": 0.77, - "learning_rate": 7.630911251989544e-05, - "loss": 3.1147, - "step": 44485 - }, - { - "epoch": 0.77, - "learning_rate": 7.62543033541657e-05, - "loss": 3.1511, - "step": 44490 - }, - { - "epoch": 0.77, - "learning_rate": 7.619951101296557e-05, - "loss": 3.2319, - "step": 44495 - }, - { - "epoch": 0.77, - "learning_rate": 7.614473550041533e-05, - "loss": 3.0392, - "step": 44500 - }, - { - "epoch": 0.77, - "eval_loss": 3.186605215072632, - "eval_runtime": 149.8734, - "eval_samples_per_second": 12.284, - "eval_steps_per_second": 0.774, - "step": 44500 - }, - { - "epoch": 0.77, - "learning_rate": 7.608997682063378e-05, - "loss": 3.1118, - "step": 44505 - }, - { - "epoch": 0.77, - "learning_rate": 7.603523497773841e-05, - "loss": 3.1806, - "step": 44510 - }, - { - "epoch": 0.77, - "learning_rate": 7.598050997584561e-05, - "loss": 3.0252, - "step": 44515 - }, - { - "epoch": 0.77, - "learning_rate": 7.592580181907033e-05, - "loss": 3.2698, - "step": 44520 - }, - { - "epoch": 0.77, - "learning_rate": 7.587111051152633e-05, - "loss": 3.1393, - "step": 44525 - }, - { - "epoch": 0.77, - "learning_rate": 7.58164360573263e-05, - "loss": 3.1737, - "step": 44530 - }, - { - "epoch": 0.77, - "learning_rate": 7.576177846058128e-05, - "loss": 3.2284, - "step": 44535 - }, - { - "epoch": 0.77, - "learning_rate": 7.570713772540138e-05, - "loss": 3.2084, - "step": 44540 - }, - { - "epoch": 0.77, - "learning_rate": 7.56525138558952e-05, - "loss": 3.0973, - "step": 44545 - }, - { - "epoch": 0.77, - "learning_rate": 7.559790685617017e-05, - "loss": 3.1736, - "step": 44550 - }, - { - "epoch": 0.77, - "learning_rate": 7.554331673033255e-05, - "loss": 3.1482, - "step": 44555 - }, - { - "epoch": 0.77, - "learning_rate": 7.54887434824873e-05, - "loss": 3.2329, - "step": 44560 - }, - { - "epoch": 0.77, - "learning_rate": 7.543418711673774e-05, - "loss": 3.0866, - "step": 44565 - }, - { - "epoch": 0.77, - "learning_rate": 7.537964763718653e-05, - "loss": 3.21, - "step": 44570 - }, - { - "epoch": 0.77, - "learning_rate": 7.532512504793467e-05, - "loss": 3.103, - "step": 44575 - }, - { - "epoch": 0.77, - "learning_rate": 7.527061935308184e-05, - "loss": 3.1107, - "step": 44580 - }, - { - "epoch": 0.77, - "learning_rate": 7.521613055672693e-05, - "loss": 3.1379, - "step": 44585 - }, - { - "epoch": 0.77, - "learning_rate": 7.516165866296681e-05, - "loss": 3.1808, - "step": 44590 - }, - { - "epoch": 0.77, - "learning_rate": 7.510720367589777e-05, - "loss": 3.1765, - "step": 44595 - }, - { - "epoch": 0.77, - "learning_rate": 7.505276559961444e-05, - "loss": 3.2069, - "step": 44600 - }, - { - "epoch": 0.77, - "learning_rate": 7.499834443821023e-05, - "loss": 3.1409, - "step": 44605 - }, - { - "epoch": 0.77, - "learning_rate": 7.494394019577759e-05, - "loss": 3.1709, - "step": 44610 - }, - { - "epoch": 0.77, - "learning_rate": 7.488955287640706e-05, - "loss": 3.1446, - "step": 44615 - }, - { - "epoch": 0.77, - "learning_rate": 7.483518248418858e-05, - "loss": 3.0126, - "step": 44620 - }, - { - "epoch": 0.77, - "learning_rate": 7.47808290232104e-05, - "loss": 3.1909, - "step": 44625 - }, - { - "epoch": 0.77, - "learning_rate": 7.47264924975596e-05, - "loss": 3.1479, - "step": 44630 - }, - { - "epoch": 0.77, - "learning_rate": 7.46721729113222e-05, - "loss": 3.084, - "step": 44635 - }, - { - "epoch": 0.77, - "learning_rate": 7.461787026858255e-05, - "loss": 3.1349, - "step": 44640 - }, - { - "epoch": 0.77, - "learning_rate": 7.456358457342386e-05, - "loss": 3.1268, - "step": 44645 - }, - { - "epoch": 0.77, - "learning_rate": 7.450931582992839e-05, - "loss": 3.1649, - "step": 44650 - }, - { - "epoch": 0.77, - "learning_rate": 7.445506404217674e-05, - "loss": 3.1787, - "step": 44655 - }, - { - "epoch": 0.77, - "learning_rate": 7.440082921424836e-05, - "loss": 3.0321, - "step": 44660 - }, - { - "epoch": 0.77, - "learning_rate": 7.434661135022149e-05, - "loss": 3.1882, - "step": 44665 - }, - { - "epoch": 0.77, - "learning_rate": 7.429241045417287e-05, - "loss": 3.1999, - "step": 44670 - }, - { - "epoch": 0.77, - "learning_rate": 7.423822653017831e-05, - "loss": 3.1162, - "step": 44675 - }, - { - "epoch": 0.77, - "learning_rate": 7.418405958231217e-05, - "loss": 3.1264, - "step": 44680 - }, - { - "epoch": 0.77, - "learning_rate": 7.412990961464742e-05, - "loss": 3.2139, - "step": 44685 - }, - { - "epoch": 0.77, - "learning_rate": 7.40757766312559e-05, - "loss": 3.0781, - "step": 44690 - }, - { - "epoch": 0.77, - "learning_rate": 7.402166063620805e-05, - "loss": 3.1487, - "step": 44695 - }, - { - "epoch": 0.77, - "learning_rate": 7.39675616335733e-05, - "loss": 3.0855, - "step": 44700 - }, - { - "epoch": 0.77, - "learning_rate": 7.391347962741948e-05, - "loss": 3.1442, - "step": 44705 - }, - { - "epoch": 0.77, - "learning_rate": 7.385941462181327e-05, - "loss": 3.2047, - "step": 44710 - }, - { - "epoch": 0.77, - "learning_rate": 7.38053666208203e-05, - "loss": 3.184, - "step": 44715 - }, - { - "epoch": 0.77, - "learning_rate": 7.375133562850443e-05, - "loss": 3.2336, - "step": 44720 - }, - { - "epoch": 0.77, - "learning_rate": 7.369732164892854e-05, - "loss": 3.1874, - "step": 44725 - }, - { - "epoch": 0.77, - "learning_rate": 7.364332468615436e-05, - "loss": 3.2044, - "step": 44730 - }, - { - "epoch": 0.77, - "learning_rate": 7.35893447442421e-05, - "loss": 3.209, - "step": 44735 - }, - { - "epoch": 0.77, - "learning_rate": 7.353538182725077e-05, - "loss": 3.1973, - "step": 44740 - }, - { - "epoch": 0.77, - "learning_rate": 7.348143593923814e-05, - "loss": 3.2708, - "step": 44745 - }, - { - "epoch": 0.77, - "learning_rate": 7.342750708426053e-05, - "loss": 3.1732, - "step": 44750 - }, - { - "epoch": 0.77, - "learning_rate": 7.337359526637332e-05, - "loss": 3.1316, - "step": 44755 - }, - { - "epoch": 0.77, - "learning_rate": 7.331970048963027e-05, - "loss": 3.0108, - "step": 44760 - }, - { - "epoch": 0.77, - "learning_rate": 7.326582275808409e-05, - "loss": 3.1858, - "step": 44765 - }, - { - "epoch": 0.77, - "learning_rate": 7.3211962075786e-05, - "loss": 3.1266, - "step": 44770 - }, - { - "epoch": 0.77, - "learning_rate": 7.3158118446786e-05, - "loss": 3.2553, - "step": 44775 - }, - { - "epoch": 0.77, - "learning_rate": 7.310429187513305e-05, - "loss": 3.0143, - "step": 44780 - }, - { - "epoch": 0.78, - "learning_rate": 7.305048236487452e-05, - "loss": 3.2031, - "step": 44785 - }, - { - "epoch": 0.78, - "learning_rate": 7.299668992005664e-05, - "loss": 3.1179, - "step": 44790 - }, - { - "epoch": 0.78, - "learning_rate": 7.294291454472429e-05, - "loss": 3.0976, - "step": 44795 - }, - { - "epoch": 0.78, - "learning_rate": 7.28891562429211e-05, - "loss": 3.1156, - "step": 44800 - }, - { - "epoch": 0.78, - "learning_rate": 7.283541501868935e-05, - "loss": 3.2027, - "step": 44805 - }, - { - "epoch": 0.78, - "learning_rate": 7.27816908760703e-05, - "loss": 3.1327, - "step": 44810 - }, - { - "epoch": 0.78, - "learning_rate": 7.272798381910361e-05, - "loss": 3.0802, - "step": 44815 - }, - { - "epoch": 0.78, - "learning_rate": 7.267429385182777e-05, - "loss": 3.1307, - "step": 44820 - }, - { - "epoch": 0.78, - "learning_rate": 7.262062097828005e-05, - "loss": 3.065, - "step": 44825 - }, - { - "epoch": 0.78, - "learning_rate": 7.256696520249623e-05, - "loss": 3.0924, - "step": 44830 - }, - { - "epoch": 0.78, - "learning_rate": 7.251332652851111e-05, - "loss": 3.1293, - "step": 44835 - }, - { - "epoch": 0.78, - "learning_rate": 7.245970496035803e-05, - "loss": 3.1039, - "step": 44840 - }, - { - "epoch": 0.78, - "learning_rate": 7.240610050206896e-05, - "loss": 3.1689, - "step": 44845 - }, - { - "epoch": 0.78, - "learning_rate": 7.235251315767475e-05, - "loss": 3.2031, - "step": 44850 - }, - { - "epoch": 0.78, - "learning_rate": 7.22989429312048e-05, - "loss": 3.0999, - "step": 44855 - }, - { - "epoch": 0.78, - "learning_rate": 7.224538982668746e-05, - "loss": 3.1289, - "step": 44860 - }, - { - "epoch": 0.78, - "learning_rate": 7.21918538481497e-05, - "loss": 3.145, - "step": 44865 - }, - { - "epoch": 0.78, - "learning_rate": 7.213833499961684e-05, - "loss": 3.2122, - "step": 44870 - }, - { - "epoch": 0.78, - "learning_rate": 7.208483328511349e-05, - "loss": 3.1842, - "step": 44875 - }, - { - "epoch": 0.78, - "learning_rate": 7.203134870866264e-05, - "loss": 3.1976, - "step": 44880 - }, - { - "epoch": 0.78, - "learning_rate": 7.197788127428595e-05, - "loss": 3.2405, - "step": 44885 - }, - { - "epoch": 0.78, - "learning_rate": 7.192443098600421e-05, - "loss": 3.0957, - "step": 44890 - }, - { - "epoch": 0.78, - "learning_rate": 7.187099784783617e-05, - "loss": 3.1705, - "step": 44895 - }, - { - "epoch": 0.78, - "learning_rate": 7.181758186380007e-05, - "loss": 2.9637, - "step": 44900 - }, - { - "epoch": 0.78, - "learning_rate": 7.176418303791237e-05, - "loss": 3.2228, - "step": 44905 - }, - { - "epoch": 0.78, - "learning_rate": 7.171080137418832e-05, - "loss": 3.1422, - "step": 44910 - }, - { - "epoch": 0.78, - "learning_rate": 7.165743687664224e-05, - "loss": 3.225, - "step": 44915 - }, - { - "epoch": 0.78, - "learning_rate": 7.160408954928651e-05, - "loss": 3.1538, - "step": 44920 - }, - { - "epoch": 0.78, - "learning_rate": 7.155075939613279e-05, - "loss": 3.0714, - "step": 44925 - }, - { - "epoch": 0.78, - "learning_rate": 7.14974464211912e-05, - "loss": 3.1305, - "step": 44930 - }, - { - "epoch": 0.78, - "learning_rate": 7.144415062847047e-05, - "loss": 3.1041, - "step": 44935 - }, - { - "epoch": 0.78, - "learning_rate": 7.139087202197842e-05, - "loss": 3.122, - "step": 44940 - }, - { - "epoch": 0.78, - "learning_rate": 7.133761060572123e-05, - "loss": 3.1977, - "step": 44945 - }, - { - "epoch": 0.78, - "learning_rate": 7.128436638370367e-05, - "loss": 3.0151, - "step": 44950 - }, - { - "epoch": 0.78, - "learning_rate": 7.123113935992975e-05, - "loss": 3.1652, - "step": 44955 - }, - { - "epoch": 0.78, - "learning_rate": 7.117792953840168e-05, - "loss": 3.2035, - "step": 44960 - }, - { - "epoch": 0.78, - "learning_rate": 7.112473692312054e-05, - "loss": 3.1325, - "step": 44965 - }, - { - "epoch": 0.78, - "learning_rate": 7.10715615180864e-05, - "loss": 3.0583, - "step": 44970 - }, - { - "epoch": 0.78, - "learning_rate": 7.101840332729739e-05, - "loss": 3.0872, - "step": 44975 - }, - { - "epoch": 0.78, - "learning_rate": 7.096526235475102e-05, - "loss": 3.2089, - "step": 44980 - }, - { - "epoch": 0.78, - "learning_rate": 7.091213860444318e-05, - "loss": 2.9796, - "step": 44985 - }, - { - "epoch": 0.78, - "learning_rate": 7.085903208036837e-05, - "loss": 3.0638, - "step": 44990 - }, - { - "epoch": 0.78, - "learning_rate": 7.080594278652017e-05, - "loss": 3.1517, - "step": 44995 - }, - { - "epoch": 0.78, - "learning_rate": 7.07528707268903e-05, - "loss": 3.1721, - "step": 45000 - }, - { - "epoch": 0.78, - "eval_loss": 3.183523654937744, - "eval_runtime": 150.1695, - "eval_samples_per_second": 12.259, - "eval_steps_per_second": 0.772, - "step": 45000 - }, - { - "epoch": 0.78, - "learning_rate": 7.069981590546978e-05, - "loss": 3.0841, - "step": 45005 - }, - { - "epoch": 0.78, - "learning_rate": 7.064677832624794e-05, - "loss": 3.2613, - "step": 45010 - }, - { - "epoch": 0.78, - "learning_rate": 7.059375799321287e-05, - "loss": 3.0345, - "step": 45015 - }, - { - "epoch": 0.78, - "learning_rate": 7.054075491035168e-05, - "loss": 3.196, - "step": 45020 - }, - { - "epoch": 0.78, - "learning_rate": 7.04877690816497e-05, - "loss": 3.1549, - "step": 45025 - }, - { - "epoch": 0.78, - "learning_rate": 7.043480051109114e-05, - "loss": 3.0979, - "step": 45030 - }, - { - "epoch": 0.78, - "learning_rate": 7.038184920265916e-05, - "loss": 3.1891, - "step": 45035 - }, - { - "epoch": 0.78, - "learning_rate": 7.032891516033538e-05, - "loss": 3.119, - "step": 45040 - }, - { - "epoch": 0.78, - "learning_rate": 7.027599838810011e-05, - "loss": 3.169, - "step": 45045 - }, - { - "epoch": 0.78, - "learning_rate": 7.022309888993245e-05, - "loss": 3.1685, - "step": 45050 - }, - { - "epoch": 0.78, - "learning_rate": 7.017021666981009e-05, - "loss": 3.2098, - "step": 45055 - }, - { - "epoch": 0.78, - "learning_rate": 7.011735173170964e-05, - "loss": 3.1718, - "step": 45060 - }, - { - "epoch": 0.78, - "learning_rate": 7.006450407960622e-05, - "loss": 3.1111, - "step": 45065 - }, - { - "epoch": 0.78, - "learning_rate": 7.001167371747369e-05, - "loss": 3.1789, - "step": 45070 - }, - { - "epoch": 0.78, - "learning_rate": 6.995886064928464e-05, - "loss": 3.0517, - "step": 45075 - }, - { - "epoch": 0.78, - "learning_rate": 6.990606487901033e-05, - "loss": 3.1516, - "step": 45080 - }, - { - "epoch": 0.78, - "learning_rate": 6.985328641062064e-05, - "loss": 3.1516, - "step": 45085 - }, - { - "epoch": 0.78, - "learning_rate": 6.98005252480844e-05, - "loss": 3.2177, - "step": 45090 - }, - { - "epoch": 0.78, - "learning_rate": 6.97477813953689e-05, - "loss": 3.0959, - "step": 45095 - }, - { - "epoch": 0.78, - "learning_rate": 6.969505485644023e-05, - "loss": 3.1638, - "step": 45100 - }, - { - "epoch": 0.78, - "learning_rate": 6.964234563526314e-05, - "loss": 3.2965, - "step": 45105 - }, - { - "epoch": 0.78, - "learning_rate": 6.958965373580101e-05, - "loss": 3.2153, - "step": 45110 - }, - { - "epoch": 0.78, - "learning_rate": 6.953697916201618e-05, - "loss": 3.1298, - "step": 45115 - }, - { - "epoch": 0.78, - "learning_rate": 6.948432191786936e-05, - "loss": 3.0617, - "step": 45120 - }, - { - "epoch": 0.78, - "learning_rate": 6.94316820073202e-05, - "loss": 3.1779, - "step": 45125 - }, - { - "epoch": 0.78, - "learning_rate": 6.93790594343269e-05, - "loss": 3.1907, - "step": 45130 - }, - { - "epoch": 0.78, - "learning_rate": 6.932645420284636e-05, - "loss": 3.1058, - "step": 45135 - }, - { - "epoch": 0.78, - "learning_rate": 6.927386631683435e-05, - "loss": 3.0789, - "step": 45140 - }, - { - "epoch": 0.78, - "learning_rate": 6.92212957802452e-05, - "loss": 3.0602, - "step": 45145 - }, - { - "epoch": 0.78, - "learning_rate": 6.916874259703172e-05, - "loss": 3.197, - "step": 45150 - }, - { - "epoch": 0.78, - "learning_rate": 6.911620677114592e-05, - "loss": 3.1129, - "step": 45155 - }, - { - "epoch": 0.78, - "learning_rate": 6.90636883065381e-05, - "loss": 3.2127, - "step": 45160 - }, - { - "epoch": 0.78, - "learning_rate": 6.901118720715731e-05, - "loss": 3.1139, - "step": 45165 - }, - { - "epoch": 0.78, - "learning_rate": 6.895870347695151e-05, - "loss": 3.2134, - "step": 45170 - }, - { - "epoch": 0.78, - "learning_rate": 6.890623711986715e-05, - "loss": 3.1768, - "step": 45175 - }, - { - "epoch": 0.78, - "learning_rate": 6.88537881398494e-05, - "loss": 3.1486, - "step": 45180 - }, - { - "epoch": 0.78, - "learning_rate": 6.88013565408422e-05, - "loss": 3.2026, - "step": 45185 - }, - { - "epoch": 0.78, - "learning_rate": 6.874894232678804e-05, - "loss": 3.1052, - "step": 45190 - }, - { - "epoch": 0.78, - "learning_rate": 6.869654550162833e-05, - "loss": 3.1453, - "step": 45195 - }, - { - "epoch": 0.78, - "learning_rate": 6.864416606930299e-05, - "loss": 3.1664, - "step": 45200 - }, - { - "epoch": 0.78, - "learning_rate": 6.859180403375072e-05, - "loss": 3.1615, - "step": 45205 - }, - { - "epoch": 0.78, - "learning_rate": 6.853945939890878e-05, - "loss": 3.0932, - "step": 45210 - }, - { - "epoch": 0.78, - "learning_rate": 6.848713216871323e-05, - "loss": 3.1201, - "step": 45215 - }, - { - "epoch": 0.78, - "learning_rate": 6.843482234709893e-05, - "loss": 3.0702, - "step": 45220 - }, - { - "epoch": 0.78, - "learning_rate": 6.838252993799932e-05, - "loss": 3.0548, - "step": 45225 - }, - { - "epoch": 0.78, - "learning_rate": 6.833025494534629e-05, - "loss": 3.1788, - "step": 45230 - }, - { - "epoch": 0.78, - "learning_rate": 6.827799737307088e-05, - "loss": 3.2091, - "step": 45235 - }, - { - "epoch": 0.78, - "learning_rate": 6.822575722510249e-05, - "loss": 3.29, - "step": 45240 - }, - { - "epoch": 0.78, - "learning_rate": 6.817353450536926e-05, - "loss": 3.0955, - "step": 45245 - }, - { - "epoch": 0.78, - "learning_rate": 6.812132921779832e-05, - "loss": 3.219, - "step": 45250 - }, - { - "epoch": 0.78, - "learning_rate": 6.806914136631487e-05, - "loss": 3.2833, - "step": 45255 - }, - { - "epoch": 0.78, - "learning_rate": 6.801697095484346e-05, - "loss": 3.1134, - "step": 45260 - }, - { - "epoch": 0.78, - "learning_rate": 6.796481798730692e-05, - "loss": 3.1767, - "step": 45265 - }, - { - "epoch": 0.78, - "learning_rate": 6.791268246762687e-05, - "loss": 3.1696, - "step": 45270 - }, - { - "epoch": 0.78, - "learning_rate": 6.78605643997238e-05, - "loss": 3.1449, - "step": 45275 - }, - { - "epoch": 0.78, - "learning_rate": 6.780846378751643e-05, - "loss": 3.1968, - "step": 45280 - }, - { - "epoch": 0.78, - "learning_rate": 6.775638063492272e-05, - "loss": 3.0654, - "step": 45285 - }, - { - "epoch": 0.78, - "learning_rate": 6.770431494585892e-05, - "loss": 3.1028, - "step": 45290 - }, - { - "epoch": 0.78, - "learning_rate": 6.765226672424007e-05, - "loss": 3.1161, - "step": 45295 - }, - { - "epoch": 0.78, - "learning_rate": 6.76002359739802e-05, - "loss": 3.2315, - "step": 45300 - }, - { - "epoch": 0.78, - "learning_rate": 6.754822269899145e-05, - "loss": 3.0886, - "step": 45305 - }, - { - "epoch": 0.78, - "learning_rate": 6.749622690318497e-05, - "loss": 3.1833, - "step": 45310 - }, - { - "epoch": 0.78, - "learning_rate": 6.744424859047076e-05, - "loss": 3.2618, - "step": 45315 - }, - { - "epoch": 0.78, - "learning_rate": 6.739228776475721e-05, - "loss": 3.1447, - "step": 45320 - }, - { - "epoch": 0.78, - "learning_rate": 6.734034442995154e-05, - "loss": 3.0796, - "step": 45325 - }, - { - "epoch": 0.78, - "learning_rate": 6.72884185899596e-05, - "loss": 3.0618, - "step": 45330 - }, - { - "epoch": 0.78, - "learning_rate": 6.723651024868586e-05, - "loss": 3.1823, - "step": 45335 - }, - { - "epoch": 0.78, - "learning_rate": 6.718461941003375e-05, - "loss": 3.0862, - "step": 45340 - }, - { - "epoch": 0.78, - "learning_rate": 6.713274607790507e-05, - "loss": 3.2958, - "step": 45345 - }, - { - "epoch": 0.78, - "learning_rate": 6.708089025620049e-05, - "loss": 3.1402, - "step": 45350 - }, - { - "epoch": 0.78, - "learning_rate": 6.702905194881928e-05, - "loss": 3.3028, - "step": 45355 - }, - { - "epoch": 0.79, - "learning_rate": 6.69772311596593e-05, - "loss": 3.1589, - "step": 45360 - }, - { - "epoch": 0.79, - "learning_rate": 6.692542789261742e-05, - "loss": 3.0914, - "step": 45365 - }, - { - "epoch": 0.79, - "learning_rate": 6.687364215158884e-05, - "loss": 3.1536, - "step": 45370 - }, - { - "epoch": 0.79, - "learning_rate": 6.682187394046763e-05, - "loss": 3.1351, - "step": 45375 - }, - { - "epoch": 0.79, - "learning_rate": 6.677012326314649e-05, - "loss": 3.1095, - "step": 45380 - }, - { - "epoch": 0.79, - "learning_rate": 6.671839012351682e-05, - "loss": 3.1706, - "step": 45385 - }, - { - "epoch": 0.79, - "learning_rate": 6.666667452546854e-05, - "loss": 3.1585, - "step": 45390 - }, - { - "epoch": 0.79, - "learning_rate": 6.661497647289062e-05, - "loss": 3.1016, - "step": 45395 - }, - { - "epoch": 0.79, - "learning_rate": 6.656329596967038e-05, - "loss": 3.1136, - "step": 45400 - }, - { - "epoch": 0.79, - "learning_rate": 6.651163301969397e-05, - "loss": 3.1383, - "step": 45405 - }, - { - "epoch": 0.79, - "learning_rate": 6.645998762684613e-05, - "loss": 3.1012, - "step": 45410 - }, - { - "epoch": 0.79, - "learning_rate": 6.640835979501025e-05, - "loss": 3.1399, - "step": 45415 - }, - { - "epoch": 0.79, - "learning_rate": 6.635674952806867e-05, - "loss": 3.158, - "step": 45420 - }, - { - "epoch": 0.79, - "learning_rate": 6.630515682990212e-05, - "loss": 3.2877, - "step": 45425 - }, - { - "epoch": 0.79, - "learning_rate": 6.62535817043901e-05, - "loss": 3.0882, - "step": 45430 - }, - { - "epoch": 0.79, - "learning_rate": 6.620202415541081e-05, - "loss": 3.2474, - "step": 45435 - }, - { - "epoch": 0.79, - "learning_rate": 6.615048418684106e-05, - "loss": 3.1571, - "step": 45440 - }, - { - "epoch": 0.79, - "learning_rate": 6.609896180255649e-05, - "loss": 3.1484, - "step": 45445 - }, - { - "epoch": 0.79, - "learning_rate": 6.604745700643126e-05, - "loss": 3.1594, - "step": 45450 - }, - { - "epoch": 0.79, - "learning_rate": 6.599596980233828e-05, - "loss": 3.2426, - "step": 45455 - }, - { - "epoch": 0.79, - "learning_rate": 6.594450019414913e-05, - "loss": 3.169, - "step": 45460 - }, - { - "epoch": 0.79, - "learning_rate": 6.589304818573405e-05, - "loss": 3.1357, - "step": 45465 - }, - { - "epoch": 0.79, - "learning_rate": 6.584161378096187e-05, - "loss": 3.0433, - "step": 45470 - }, - { - "epoch": 0.79, - "learning_rate": 6.57901969837004e-05, - "loss": 3.1571, - "step": 45475 - }, - { - "epoch": 0.79, - "learning_rate": 6.573879779781578e-05, - "loss": 3.1311, - "step": 45480 - }, - { - "epoch": 0.79, - "learning_rate": 6.568741622717303e-05, - "loss": 3.191, - "step": 45485 - }, - { - "epoch": 0.79, - "learning_rate": 6.563605227563572e-05, - "loss": 3.1198, - "step": 45490 - }, - { - "epoch": 0.79, - "learning_rate": 6.558470594706609e-05, - "loss": 3.1676, - "step": 45495 - }, - { - "epoch": 0.79, - "learning_rate": 6.553337724532533e-05, - "loss": 3.0357, - "step": 45500 - }, - { - "epoch": 0.79, - "eval_loss": 3.180738925933838, - "eval_runtime": 151.371, - "eval_samples_per_second": 12.162, - "eval_steps_per_second": 0.766, - "step": 45500 - }, - { - "epoch": 0.79, - "learning_rate": 6.548206617427295e-05, - "loss": 3.1704, - "step": 45505 - }, - { - "epoch": 0.79, - "learning_rate": 6.543077273776732e-05, - "loss": 3.1313, - "step": 45510 - }, - { - "epoch": 0.79, - "learning_rate": 6.537949693966543e-05, - "loss": 3.0594, - "step": 45515 - }, - { - "epoch": 0.79, - "learning_rate": 6.53282387838229e-05, - "loss": 3.0647, - "step": 45520 - }, - { - "epoch": 0.79, - "learning_rate": 6.52769982740942e-05, - "loss": 3.1225, - "step": 45525 - }, - { - "epoch": 0.79, - "learning_rate": 6.522577541433235e-05, - "loss": 3.1439, - "step": 45530 - }, - { - "epoch": 0.79, - "learning_rate": 6.517457020838887e-05, - "loss": 3.1825, - "step": 45535 - }, - { - "epoch": 0.79, - "learning_rate": 6.51233826601143e-05, - "loss": 3.2263, - "step": 45540 - }, - { - "epoch": 0.79, - "learning_rate": 6.507221277335767e-05, - "loss": 3.167, - "step": 45545 - }, - { - "epoch": 0.79, - "learning_rate": 6.502106055196654e-05, - "loss": 3.0891, - "step": 45550 - }, - { - "epoch": 0.79, - "learning_rate": 6.496992599978757e-05, - "loss": 3.0816, - "step": 45555 - }, - { - "epoch": 0.79, - "learning_rate": 6.491880912066549e-05, - "loss": 3.1269, - "step": 45560 - }, - { - "epoch": 0.79, - "learning_rate": 6.486770991844429e-05, - "loss": 3.1808, - "step": 45565 - }, - { - "epoch": 0.79, - "learning_rate": 6.481662839696626e-05, - "loss": 3.2526, - "step": 45570 - }, - { - "epoch": 0.79, - "learning_rate": 6.476556456007242e-05, - "loss": 3.0343, - "step": 45575 - }, - { - "epoch": 0.79, - "learning_rate": 6.47145184116027e-05, - "loss": 3.268, - "step": 45580 - }, - { - "epoch": 0.79, - "learning_rate": 6.466348995539531e-05, - "loss": 3.0679, - "step": 45585 - }, - { - "epoch": 0.79, - "learning_rate": 6.46124791952873e-05, - "loss": 3.1898, - "step": 45590 - }, - { - "epoch": 0.79, - "learning_rate": 6.456148613511462e-05, - "loss": 3.0314, - "step": 45595 - }, - { - "epoch": 0.79, - "learning_rate": 6.451051077871154e-05, - "loss": 3.1434, - "step": 45600 - }, - { - "epoch": 0.79, - "learning_rate": 6.445955312991122e-05, - "loss": 3.2544, - "step": 45605 - }, - { - "epoch": 0.79, - "learning_rate": 6.440861319254536e-05, - "loss": 3.0762, - "step": 45610 - }, - { - "epoch": 0.79, - "learning_rate": 6.435769097044431e-05, - "loss": 2.9781, - "step": 45615 - }, - { - "epoch": 0.79, - "learning_rate": 6.430678646743735e-05, - "loss": 3.1502, - "step": 45620 - }, - { - "epoch": 0.79, - "learning_rate": 6.425589968735217e-05, - "loss": 3.0586, - "step": 45625 - }, - { - "epoch": 0.79, - "learning_rate": 6.420503063401511e-05, - "loss": 3.0531, - "step": 45630 - }, - { - "epoch": 0.79, - "learning_rate": 6.415417931125134e-05, - "loss": 3.0447, - "step": 45635 - }, - { - "epoch": 0.79, - "learning_rate": 6.41033457228845e-05, - "loss": 3.1198, - "step": 45640 - }, - { - "epoch": 0.79, - "learning_rate": 6.405252987273723e-05, - "loss": 3.1565, - "step": 45645 - }, - { - "epoch": 0.79, - "learning_rate": 6.400173176463047e-05, - "loss": 3.1355, - "step": 45650 - }, - { - "epoch": 0.79, - "learning_rate": 6.395095140238393e-05, - "loss": 3.0957, - "step": 45655 - }, - { - "epoch": 0.79, - "learning_rate": 6.390018878981627e-05, - "loss": 3.0195, - "step": 45660 - }, - { - "epoch": 0.79, - "learning_rate": 6.384944393074434e-05, - "loss": 3.1708, - "step": 45665 - }, - { - "epoch": 0.79, - "learning_rate": 6.379871682898392e-05, - "loss": 3.0612, - "step": 45670 - }, - { - "epoch": 0.79, - "learning_rate": 6.374800748834953e-05, - "loss": 3.1129, - "step": 45675 - }, - { - "epoch": 0.79, - "learning_rate": 6.369731591265423e-05, - "loss": 3.1009, - "step": 45680 - }, - { - "epoch": 0.79, - "learning_rate": 6.364664210570973e-05, - "loss": 3.0948, - "step": 45685 - }, - { - "epoch": 0.79, - "learning_rate": 6.359598607132644e-05, - "loss": 3.0709, - "step": 45690 - }, - { - "epoch": 0.79, - "learning_rate": 6.35453478133134e-05, - "loss": 3.043, - "step": 45695 - }, - { - "epoch": 0.79, - "learning_rate": 6.349472733547844e-05, - "loss": 3.1692, - "step": 45700 - }, - { - "epoch": 0.79, - "learning_rate": 6.344412464162795e-05, - "loss": 3.247, - "step": 45705 - }, - { - "epoch": 0.79, - "learning_rate": 6.33935397355669e-05, - "loss": 3.1907, - "step": 45710 - }, - { - "epoch": 0.79, - "learning_rate": 6.334297262109912e-05, - "loss": 3.3283, - "step": 45715 - }, - { - "epoch": 0.79, - "learning_rate": 6.329242330202686e-05, - "loss": 3.2082, - "step": 45720 - }, - { - "epoch": 0.79, - "learning_rate": 6.324189178215133e-05, - "loss": 3.1577, - "step": 45725 - }, - { - "epoch": 0.79, - "learning_rate": 6.319137806527219e-05, - "loss": 3.096, - "step": 45730 - }, - { - "epoch": 0.79, - "learning_rate": 6.314088215518776e-05, - "loss": 3.1203, - "step": 45735 - }, - { - "epoch": 0.79, - "learning_rate": 6.309040405569515e-05, - "loss": 3.082, - "step": 45740 - }, - { - "epoch": 0.79, - "learning_rate": 6.303994377058999e-05, - "loss": 3.1651, - "step": 45745 - }, - { - "epoch": 0.79, - "learning_rate": 6.298950130366659e-05, - "loss": 3.2569, - "step": 45750 - }, - { - "epoch": 0.79, - "learning_rate": 6.293907665871808e-05, - "loss": 3.0798, - "step": 45755 - }, - { - "epoch": 0.79, - "learning_rate": 6.288866983953612e-05, - "loss": 3.2079, - "step": 45760 - }, - { - "epoch": 0.79, - "learning_rate": 6.283828084991102e-05, - "loss": 3.2477, - "step": 45765 - }, - { - "epoch": 0.79, - "learning_rate": 6.278790969363175e-05, - "loss": 3.2101, - "step": 45770 - }, - { - "epoch": 0.79, - "learning_rate": 6.27375563744859e-05, - "loss": 3.2387, - "step": 45775 - }, - { - "epoch": 0.79, - "learning_rate": 6.268722089625992e-05, - "loss": 3.1574, - "step": 45780 - }, - { - "epoch": 0.79, - "learning_rate": 6.263690326273874e-05, - "loss": 3.1126, - "step": 45785 - }, - { - "epoch": 0.79, - "learning_rate": 6.258660347770597e-05, - "loss": 3.2076, - "step": 45790 - }, - { - "epoch": 0.79, - "learning_rate": 6.253632154494388e-05, - "loss": 3.1809, - "step": 45795 - }, - { - "epoch": 0.79, - "learning_rate": 6.248605746823339e-05, - "loss": 3.131, - "step": 45800 - }, - { - "epoch": 0.79, - "learning_rate": 6.243581125135417e-05, - "loss": 3.1605, - "step": 45805 - }, - { - "epoch": 0.79, - "learning_rate": 6.238558289808456e-05, - "loss": 3.1935, - "step": 45810 - }, - { - "epoch": 0.79, - "learning_rate": 6.233537241220117e-05, - "loss": 3.1822, - "step": 45815 - }, - { - "epoch": 0.79, - "learning_rate": 6.228517979747987e-05, - "loss": 3.1973, - "step": 45820 - }, - { - "epoch": 0.79, - "learning_rate": 6.223500505769476e-05, - "loss": 3.0712, - "step": 45825 - }, - { - "epoch": 0.79, - "learning_rate": 6.218484819661864e-05, - "loss": 3.2074, - "step": 45830 - }, - { - "epoch": 0.79, - "learning_rate": 6.213470921802335e-05, - "loss": 3.1901, - "step": 45835 - }, - { - "epoch": 0.79, - "learning_rate": 6.208458812567873e-05, - "loss": 3.1254, - "step": 45840 - }, - { - "epoch": 0.79, - "learning_rate": 6.203448492335385e-05, - "loss": 3.2218, - "step": 45845 - }, - { - "epoch": 0.79, - "learning_rate": 6.198439961481615e-05, - "loss": 3.194, - "step": 45850 - }, - { - "epoch": 0.79, - "learning_rate": 6.193433220383174e-05, - "loss": 3.1396, - "step": 45855 - }, - { - "epoch": 0.79, - "learning_rate": 6.188428269416561e-05, - "loss": 3.2048, - "step": 45860 - }, - { - "epoch": 0.79, - "learning_rate": 6.183425108958094e-05, - "loss": 3.085, - "step": 45865 - }, - { - "epoch": 0.79, - "learning_rate": 6.178423739384006e-05, - "loss": 3.1692, - "step": 45870 - }, - { - "epoch": 0.79, - "learning_rate": 6.173424161070372e-05, - "loss": 3.1756, - "step": 45875 - }, - { - "epoch": 0.79, - "learning_rate": 6.168426374393125e-05, - "loss": 3.1431, - "step": 45880 - }, - { - "epoch": 0.79, - "learning_rate": 6.163430379728086e-05, - "loss": 3.2415, - "step": 45885 - }, - { - "epoch": 0.79, - "learning_rate": 6.158436177450931e-05, - "loss": 3.27, - "step": 45890 - }, - { - "epoch": 0.79, - "learning_rate": 6.153443767937175e-05, - "loss": 3.0112, - "step": 45895 - }, - { - "epoch": 0.79, - "learning_rate": 6.148453151562243e-05, - "loss": 3.1826, - "step": 45900 - }, - { - "epoch": 0.79, - "learning_rate": 6.143464328701394e-05, - "loss": 3.2231, - "step": 45905 - }, - { - "epoch": 0.79, - "learning_rate": 6.13847729972976e-05, - "loss": 3.1354, - "step": 45910 - }, - { - "epoch": 0.79, - "learning_rate": 6.133492065022358e-05, - "loss": 3.0341, - "step": 45915 - }, - { - "epoch": 0.79, - "learning_rate": 6.128508624954025e-05, - "loss": 3.1803, - "step": 45920 - }, - { - "epoch": 0.79, - "learning_rate": 6.12352697989951e-05, - "loss": 3.0783, - "step": 45925 - }, - { - "epoch": 0.79, - "learning_rate": 6.118547130233401e-05, - "loss": 3.1693, - "step": 45930 - }, - { - "epoch": 0.79, - "learning_rate": 6.113569076330152e-05, - "loss": 3.1872, - "step": 45935 - }, - { - "epoch": 0.8, - "learning_rate": 6.108592818564105e-05, - "loss": 3.1872, - "step": 45940 - }, - { - "epoch": 0.8, - "learning_rate": 6.103618357309423e-05, - "loss": 3.1362, - "step": 45945 - }, - { - "epoch": 0.8, - "learning_rate": 6.0986456929401795e-05, - "loss": 3.0724, - "step": 45950 - }, - { - "epoch": 0.8, - "learning_rate": 6.09367482583029e-05, - "loss": 3.2822, - "step": 45955 - }, - { - "epoch": 0.8, - "learning_rate": 6.0887057563535266e-05, - "loss": 3.1595, - "step": 45960 - }, - { - "epoch": 0.8, - "learning_rate": 6.0837384848835634e-05, - "loss": 3.0659, - "step": 45965 - }, - { - "epoch": 0.8, - "learning_rate": 6.0787730117938885e-05, - "loss": 3.192, - "step": 45970 - }, - { - "epoch": 0.8, - "learning_rate": 6.073809337457879e-05, - "loss": 3.1099, - "step": 45975 - }, - { - "epoch": 0.8, - "learning_rate": 6.068847462248797e-05, - "loss": 3.1255, - "step": 45980 - }, - { - "epoch": 0.8, - "learning_rate": 6.063887386539741e-05, - "loss": 3.2569, - "step": 45985 - }, - { - "epoch": 0.8, - "learning_rate": 6.058929110703685e-05, - "loss": 3.1493, - "step": 45990 - }, - { - "epoch": 0.8, - "learning_rate": 6.0539726351134585e-05, - "loss": 3.1143, - "step": 45995 - }, - { - "epoch": 0.8, - "learning_rate": 6.049017960141767e-05, - "loss": 3.1716, - "step": 46000 - }, - { - "epoch": 0.8, - "eval_loss": 3.1786859035491943, - "eval_runtime": 150.5699, - "eval_samples_per_second": 12.227, - "eval_steps_per_second": 0.77, - "step": 46000 - }, - { - "epoch": 0.8, - "learning_rate": 6.04406508616118e-05, - "loss": 3.0864, - "step": 46005 - }, - { - "epoch": 0.8, - "learning_rate": 6.0391140135441295e-05, - "loss": 3.2294, - "step": 46010 - }, - { - "epoch": 0.8, - "learning_rate": 6.0341647426629056e-05, - "loss": 3.2553, - "step": 46015 - }, - { - "epoch": 0.8, - "learning_rate": 6.029217273889672e-05, - "loss": 3.1414, - "step": 46020 - }, - { - "epoch": 0.8, - "learning_rate": 6.0242716075964424e-05, - "loss": 3.1521, - "step": 46025 - }, - { - "epoch": 0.8, - "learning_rate": 6.0193277441551244e-05, - "loss": 3.2215, - "step": 46030 - }, - { - "epoch": 0.8, - "learning_rate": 6.014385683937457e-05, - "loss": 3.1964, - "step": 46035 - }, - { - "epoch": 0.8, - "learning_rate": 6.009445427315066e-05, - "loss": 2.9809, - "step": 46040 - }, - { - "epoch": 0.8, - "learning_rate": 6.0045069746594275e-05, - "loss": 3.1355, - "step": 46045 - }, - { - "epoch": 0.8, - "learning_rate": 5.999570326341888e-05, - "loss": 3.1077, - "step": 46050 - }, - { - "epoch": 0.8, - "learning_rate": 5.994635482733654e-05, - "loss": 3.167, - "step": 46055 - }, - { - "epoch": 0.8, - "learning_rate": 5.989702444205812e-05, - "loss": 3.2413, - "step": 46060 - }, - { - "epoch": 0.8, - "learning_rate": 5.9847712111292946e-05, - "loss": 3.1135, - "step": 46065 - }, - { - "epoch": 0.8, - "learning_rate": 5.9798417838749094e-05, - "loss": 3.2011, - "step": 46070 - }, - { - "epoch": 0.8, - "learning_rate": 5.974914162813319e-05, - "loss": 3.1403, - "step": 46075 - }, - { - "epoch": 0.8, - "learning_rate": 5.969988348315048e-05, - "loss": 3.1994, - "step": 46080 - }, - { - "epoch": 0.8, - "learning_rate": 5.965064340750513e-05, - "loss": 3.1759, - "step": 46085 - }, - { - "epoch": 0.8, - "learning_rate": 5.960142140489967e-05, - "loss": 3.1331, - "step": 46090 - }, - { - "epoch": 0.8, - "learning_rate": 5.955221747903512e-05, - "loss": 3.2708, - "step": 46095 - }, - { - "epoch": 0.8, - "learning_rate": 5.9503031633611654e-05, - "loss": 3.103, - "step": 46100 - }, - { - "epoch": 0.8, - "learning_rate": 5.94538638723276e-05, - "loss": 3.1251, - "step": 46105 - }, - { - "epoch": 0.8, - "learning_rate": 5.9404714198880275e-05, - "loss": 3.1968, - "step": 46110 - }, - { - "epoch": 0.8, - "learning_rate": 5.9355582616965425e-05, - "loss": 3.0474, - "step": 46115 - }, - { - "epoch": 0.8, - "learning_rate": 5.9306469130277456e-05, - "loss": 3.168, - "step": 46120 - }, - { - "epoch": 0.8, - "learning_rate": 5.925737374250949e-05, - "loss": 3.1837, - "step": 46125 - }, - { - "epoch": 0.8, - "learning_rate": 5.920829645735322e-05, - "loss": 3.1468, - "step": 46130 - }, - { - "epoch": 0.8, - "learning_rate": 5.9159237278498947e-05, - "loss": 3.0686, - "step": 46135 - }, - { - "epoch": 0.8, - "learning_rate": 5.911019620963582e-05, - "loss": 3.1152, - "step": 46140 - }, - { - "epoch": 0.8, - "learning_rate": 5.906117325445141e-05, - "loss": 3.2782, - "step": 46145 - }, - { - "epoch": 0.8, - "learning_rate": 5.901216841663194e-05, - "loss": 3.089, - "step": 46150 - }, - { - "epoch": 0.8, - "learning_rate": 5.89631816998624e-05, - "loss": 3.1593, - "step": 46155 - }, - { - "epoch": 0.8, - "learning_rate": 5.891421310782621e-05, - "loss": 3.2204, - "step": 46160 - }, - { - "epoch": 0.8, - "learning_rate": 5.886526264420573e-05, - "loss": 3.1318, - "step": 46165 - }, - { - "epoch": 0.8, - "learning_rate": 5.881633031268179e-05, - "loss": 3.1237, - "step": 46170 - }, - { - "epoch": 0.8, - "learning_rate": 5.876741611693361e-05, - "loss": 3.0531, - "step": 46175 - }, - { - "epoch": 0.8, - "learning_rate": 5.8718520060639494e-05, - "loss": 3.1773, - "step": 46180 - }, - { - "epoch": 0.8, - "learning_rate": 5.866964214747615e-05, - "loss": 3.1155, - "step": 46185 - }, - { - "epoch": 0.8, - "learning_rate": 5.8620782381118826e-05, - "loss": 3.1278, - "step": 46190 - }, - { - "epoch": 0.8, - "learning_rate": 5.8571940765241755e-05, - "loss": 3.0726, - "step": 46195 - }, - { - "epoch": 0.8, - "learning_rate": 5.85231173035173e-05, - "loss": 3.2045, - "step": 46200 - }, - { - "epoch": 0.8, - "learning_rate": 5.8474311999616954e-05, - "loss": 3.1445, - "step": 46205 - }, - { - "epoch": 0.8, - "learning_rate": 5.842552485721055e-05, - "loss": 3.2156, - "step": 46210 - }, - { - "epoch": 0.8, - "learning_rate": 5.8376755879966554e-05, - "loss": 3.1789, - "step": 46215 - }, - { - "epoch": 0.8, - "learning_rate": 5.8328005071552364e-05, - "loss": 3.1581, - "step": 46220 - }, - { - "epoch": 0.8, - "learning_rate": 5.827927243563348e-05, - "loss": 3.2274, - "step": 46225 - }, - { - "epoch": 0.8, - "learning_rate": 5.823055797587458e-05, - "loss": 3.0204, - "step": 46230 - }, - { - "epoch": 0.8, - "learning_rate": 5.8181861695938695e-05, - "loss": 3.1658, - "step": 46235 - }, - { - "epoch": 0.8, - "learning_rate": 5.813318359948742e-05, - "loss": 3.0678, - "step": 46240 - }, - { - "epoch": 0.8, - "learning_rate": 5.8084523690181336e-05, - "loss": 3.1534, - "step": 46245 - }, - { - "epoch": 0.8, - "learning_rate": 5.803588197167922e-05, - "loss": 3.1554, - "step": 46250 - }, - { - "epoch": 0.8, - "learning_rate": 5.798725844763864e-05, - "loss": 3.1348, - "step": 46255 - }, - { - "epoch": 0.8, - "learning_rate": 5.793865312171604e-05, - "loss": 3.1477, - "step": 46260 - }, - { - "epoch": 0.8, - "learning_rate": 5.7890065997566126e-05, - "loss": 3.2401, - "step": 46265 - }, - { - "epoch": 0.8, - "learning_rate": 5.784149707884248e-05, - "loss": 3.2408, - "step": 46270 - }, - { - "epoch": 0.8, - "learning_rate": 5.779294636919721e-05, - "loss": 3.1458, - "step": 46275 - }, - { - "epoch": 0.8, - "learning_rate": 5.774441387228104e-05, - "loss": 3.0876, - "step": 46280 - }, - { - "epoch": 0.8, - "learning_rate": 5.769589959174343e-05, - "loss": 3.081, - "step": 46285 - }, - { - "epoch": 0.8, - "learning_rate": 5.7647403531232415e-05, - "loss": 3.1654, - "step": 46290 - }, - { - "epoch": 0.8, - "learning_rate": 5.7598925694394605e-05, - "loss": 3.1191, - "step": 46295 - }, - { - "epoch": 0.8, - "learning_rate": 5.755046608487528e-05, - "loss": 3.1663, - "step": 46300 - }, - { - "epoch": 0.8, - "learning_rate": 5.750202470631833e-05, - "loss": 3.1126, - "step": 46305 - }, - { - "epoch": 0.8, - "learning_rate": 5.74536015623664e-05, - "loss": 3.1349, - "step": 46310 - }, - { - "epoch": 0.8, - "learning_rate": 5.740519665666059e-05, - "loss": 3.1206, - "step": 46315 - }, - { - "epoch": 0.8, - "learning_rate": 5.7356809992840715e-05, - "loss": 3.078, - "step": 46320 - }, - { - "epoch": 0.8, - "learning_rate": 5.730844157454523e-05, - "loss": 3.0895, - "step": 46325 - }, - { - "epoch": 0.8, - "learning_rate": 5.7260091405411124e-05, - "loss": 3.2058, - "step": 46330 - }, - { - "epoch": 0.8, - "learning_rate": 5.721175948907406e-05, - "loss": 3.2039, - "step": 46335 - }, - { - "epoch": 0.8, - "learning_rate": 5.71634458291685e-05, - "loss": 3.1625, - "step": 46340 - }, - { - "epoch": 0.8, - "learning_rate": 5.71151504293273e-05, - "loss": 3.1704, - "step": 46345 - }, - { - "epoch": 0.8, - "learning_rate": 5.706687329318202e-05, - "loss": 3.112, - "step": 46350 - }, - { - "epoch": 0.8, - "learning_rate": 5.701861442436285e-05, - "loss": 3.1327, - "step": 46355 - }, - { - "epoch": 0.8, - "learning_rate": 5.6970373826498536e-05, - "loss": 3.1424, - "step": 46360 - }, - { - "epoch": 0.8, - "learning_rate": 5.692215150321667e-05, - "loss": 3.1835, - "step": 46365 - }, - { - "epoch": 0.8, - "learning_rate": 5.6873947458143255e-05, - "loss": 3.0898, - "step": 46370 - }, - { - "epoch": 0.8, - "learning_rate": 5.682576169490302e-05, - "loss": 3.0496, - "step": 46375 - }, - { - "epoch": 0.8, - "learning_rate": 5.6777594217119224e-05, - "loss": 3.1112, - "step": 46380 - }, - { - "epoch": 0.8, - "learning_rate": 5.672944502841379e-05, - "loss": 3.201, - "step": 46385 - }, - { - "epoch": 0.8, - "learning_rate": 5.668131413240744e-05, - "loss": 3.0926, - "step": 46390 - }, - { - "epoch": 0.8, - "learning_rate": 5.6633201532719256e-05, - "loss": 3.1919, - "step": 46395 - }, - { - "epoch": 0.8, - "learning_rate": 5.6585107232967095e-05, - "loss": 3.2124, - "step": 46400 - }, - { - "epoch": 0.8, - "learning_rate": 5.653703123676738e-05, - "loss": 3.2094, - "step": 46405 - }, - { - "epoch": 0.8, - "learning_rate": 5.6488973547735184e-05, - "loss": 3.0477, - "step": 46410 - }, - { - "epoch": 0.8, - "learning_rate": 5.6440934169484106e-05, - "loss": 3.1221, - "step": 46415 - }, - { - "epoch": 0.8, - "learning_rate": 5.639291310562667e-05, - "loss": 3.0849, - "step": 46420 - }, - { - "epoch": 0.8, - "learning_rate": 5.634491035977371e-05, - "loss": 3.053, - "step": 46425 - }, - { - "epoch": 0.8, - "learning_rate": 5.6296925935534746e-05, - "loss": 3.1511, - "step": 46430 - }, - { - "epoch": 0.8, - "learning_rate": 5.624895983651801e-05, - "loss": 3.1053, - "step": 46435 - }, - { - "epoch": 0.8, - "learning_rate": 5.620101206633021e-05, - "loss": 3.2083, - "step": 46440 - }, - { - "epoch": 0.8, - "learning_rate": 5.6153082628576937e-05, - "loss": 3.1903, - "step": 46445 - }, - { - "epoch": 0.8, - "learning_rate": 5.610517152686212e-05, - "loss": 3.1463, - "step": 46450 - }, - { - "epoch": 0.8, - "learning_rate": 5.605727876478852e-05, - "loss": 3.1161, - "step": 46455 - }, - { - "epoch": 0.8, - "learning_rate": 5.600940434595733e-05, - "loss": 3.1253, - "step": 46460 - }, - { - "epoch": 0.8, - "learning_rate": 5.596154827396842e-05, - "loss": 3.2649, - "step": 46465 - }, - { - "epoch": 0.8, - "learning_rate": 5.591371055242049e-05, - "loss": 3.1203, - "step": 46470 - }, - { - "epoch": 0.8, - "learning_rate": 5.5865891184910706e-05, - "loss": 3.1973, - "step": 46475 - }, - { - "epoch": 0.8, - "learning_rate": 5.581809017503457e-05, - "loss": 3.196, - "step": 46480 - }, - { - "epoch": 0.8, - "learning_rate": 5.5770307526386703e-05, - "loss": 3.0937, - "step": 46485 - }, - { - "epoch": 0.8, - "learning_rate": 5.572254324256004e-05, - "loss": 3.1002, - "step": 46490 - }, - { - "epoch": 0.8, - "learning_rate": 5.567479732714616e-05, - "loss": 3.2265, - "step": 46495 - }, - { - "epoch": 0.8, - "learning_rate": 5.5627069783735534e-05, - "loss": 3.0773, - "step": 46500 - }, - { - "epoch": 0.8, - "eval_loss": 3.175149440765381, - "eval_runtime": 150.553, - "eval_samples_per_second": 12.228, - "eval_steps_per_second": 0.77, - "step": 46500 - }, - { - "epoch": 0.8, - "learning_rate": 5.5579360615916724e-05, - "loss": 3.1069, - "step": 46505 - }, - { - "epoch": 0.8, - "learning_rate": 5.5531669827277426e-05, - "loss": 3.1709, - "step": 46510 - }, - { - "epoch": 0.8, - "learning_rate": 5.548399742140367e-05, - "loss": 3.0737, - "step": 46515 - }, - { - "epoch": 0.81, - "learning_rate": 5.543634340188012e-05, - "loss": 3.1628, - "step": 46520 - }, - { - "epoch": 0.81, - "learning_rate": 5.538870777229032e-05, - "loss": 3.1883, - "step": 46525 - }, - { - "epoch": 0.81, - "learning_rate": 5.534109053621596e-05, - "loss": 3.1801, - "step": 46530 - }, - { - "epoch": 0.81, - "learning_rate": 5.5293491697237776e-05, - "loss": 3.0952, - "step": 46535 - }, - { - "epoch": 0.81, - "learning_rate": 5.524591125893492e-05, - "loss": 3.139, - "step": 46540 - }, - { - "epoch": 0.81, - "learning_rate": 5.519834922488511e-05, - "loss": 3.0904, - "step": 46545 - }, - { - "epoch": 0.81, - "learning_rate": 5.5150805598665026e-05, - "loss": 3.0953, - "step": 46550 - }, - { - "epoch": 0.81, - "learning_rate": 5.510328038384944e-05, - "loss": 3.1868, - "step": 46555 - }, - { - "epoch": 0.81, - "learning_rate": 5.505577358401201e-05, - "loss": 3.1019, - "step": 46560 - }, - { - "epoch": 0.81, - "learning_rate": 5.500828520272519e-05, - "loss": 3.0841, - "step": 46565 - }, - { - "epoch": 0.81, - "learning_rate": 5.496081524355973e-05, - "loss": 3.1502, - "step": 46570 - }, - { - "epoch": 0.81, - "learning_rate": 5.491336371008508e-05, - "loss": 3.1491, - "step": 46575 - }, - { - "epoch": 0.81, - "learning_rate": 5.486593060586961e-05, - "loss": 3.0894, - "step": 46580 - }, - { - "epoch": 0.81, - "learning_rate": 5.481851593447968e-05, - "loss": 3.1844, - "step": 46585 - }, - { - "epoch": 0.81, - "learning_rate": 5.4771119699480926e-05, - "loss": 3.1282, - "step": 46590 - }, - { - "epoch": 0.81, - "learning_rate": 5.472374190443717e-05, - "loss": 3.1935, - "step": 46595 - }, - { - "epoch": 0.81, - "learning_rate": 5.467638255291092e-05, - "loss": 2.9922, - "step": 46600 - }, - { - "epoch": 0.81, - "learning_rate": 5.462904164846361e-05, - "loss": 3.0834, - "step": 46605 - }, - { - "epoch": 0.81, - "learning_rate": 5.4581719194654715e-05, - "loss": 3.0396, - "step": 46610 - }, - { - "epoch": 0.81, - "learning_rate": 5.453441519504286e-05, - "loss": 3.1753, - "step": 46615 - }, - { - "epoch": 0.81, - "learning_rate": 5.448712965318496e-05, - "loss": 3.0239, - "step": 46620 - }, - { - "epoch": 0.81, - "learning_rate": 5.443986257263664e-05, - "loss": 3.1184, - "step": 46625 - }, - { - "epoch": 0.81, - "learning_rate": 5.4392613956952325e-05, - "loss": 3.0795, - "step": 46630 - }, - { - "epoch": 0.81, - "learning_rate": 5.4345383809684605e-05, - "loss": 3.149, - "step": 46635 - }, - { - "epoch": 0.81, - "learning_rate": 5.429817213438504e-05, - "loss": 3.1892, - "step": 46640 - }, - { - "epoch": 0.81, - "learning_rate": 5.425097893460376e-05, - "loss": 3.1876, - "step": 46645 - }, - { - "epoch": 0.81, - "learning_rate": 5.4203804213889455e-05, - "loss": 3.1224, - "step": 46650 - }, - { - "epoch": 0.81, - "learning_rate": 5.415664797578939e-05, - "loss": 3.3148, - "step": 46655 - }, - { - "epoch": 0.81, - "learning_rate": 5.4109510223849464e-05, - "loss": 3.201, - "step": 46660 - }, - { - "epoch": 0.81, - "learning_rate": 5.40623909616141e-05, - "loss": 3.1676, - "step": 46665 - }, - { - "epoch": 0.81, - "learning_rate": 5.4015290192626606e-05, - "loss": 3.159, - "step": 46670 - }, - { - "epoch": 0.81, - "learning_rate": 5.396820792042864e-05, - "loss": 3.0609, - "step": 46675 - }, - { - "epoch": 0.81, - "learning_rate": 5.392114414856058e-05, - "loss": 3.1752, - "step": 46680 - }, - { - "epoch": 0.81, - "learning_rate": 5.3874098880561324e-05, - "loss": 3.1882, - "step": 46685 - }, - { - "epoch": 0.81, - "learning_rate": 5.382707211996844e-05, - "loss": 3.1596, - "step": 46690 - }, - { - "epoch": 0.81, - "learning_rate": 5.378006387031807e-05, - "loss": 3.0796, - "step": 46695 - }, - { - "epoch": 0.81, - "learning_rate": 5.373307413514508e-05, - "loss": 3.1536, - "step": 46700 - }, - { - "epoch": 0.81, - "learning_rate": 5.368610291798285e-05, - "loss": 3.0925, - "step": 46705 - }, - { - "epoch": 0.81, - "learning_rate": 5.363915022236332e-05, - "loss": 3.1855, - "step": 46710 - }, - { - "epoch": 0.81, - "learning_rate": 5.359221605181711e-05, - "loss": 3.1994, - "step": 46715 - }, - { - "epoch": 0.81, - "learning_rate": 5.354530040987337e-05, - "loss": 3.094, - "step": 46720 - }, - { - "epoch": 0.81, - "learning_rate": 5.3498403300060064e-05, - "loss": 3.1899, - "step": 46725 - }, - { - "epoch": 0.81, - "learning_rate": 5.34515247259035e-05, - "loss": 3.2052, - "step": 46730 - }, - { - "epoch": 0.81, - "learning_rate": 5.340466469092877e-05, - "loss": 3.0525, - "step": 46735 - }, - { - "epoch": 0.81, - "learning_rate": 5.3357823198659464e-05, - "loss": 3.185, - "step": 46740 - }, - { - "epoch": 0.81, - "learning_rate": 5.3311000252617756e-05, - "loss": 3.2134, - "step": 46745 - }, - { - "epoch": 0.81, - "learning_rate": 5.3264195856324645e-05, - "loss": 3.117, - "step": 46750 - }, - { - "epoch": 0.81, - "learning_rate": 5.321741001329955e-05, - "loss": 3.2239, - "step": 46755 - }, - { - "epoch": 0.81, - "learning_rate": 5.317064272706038e-05, - "loss": 3.0463, - "step": 46760 - }, - { - "epoch": 0.81, - "learning_rate": 5.312389400112392e-05, - "loss": 3.1178, - "step": 46765 - }, - { - "epoch": 0.81, - "learning_rate": 5.307716383900545e-05, - "loss": 3.1229, - "step": 46770 - }, - { - "epoch": 0.81, - "learning_rate": 5.303045224421872e-05, - "loss": 3.1428, - "step": 46775 - }, - { - "epoch": 0.81, - "learning_rate": 5.298375922027642e-05, - "loss": 3.1259, - "step": 46780 - }, - { - "epoch": 0.81, - "learning_rate": 5.293708477068934e-05, - "loss": 3.1495, - "step": 46785 - }, - { - "epoch": 0.81, - "learning_rate": 5.289042889896738e-05, - "loss": 3.2403, - "step": 46790 - }, - { - "epoch": 0.81, - "learning_rate": 5.2843791608618745e-05, - "loss": 3.1276, - "step": 46795 - }, - { - "epoch": 0.81, - "learning_rate": 5.279717290315029e-05, - "loss": 3.0485, - "step": 46800 - }, - { - "epoch": 0.81, - "learning_rate": 5.275057278606756e-05, - "loss": 3.1209, - "step": 46805 - }, - { - "epoch": 0.81, - "learning_rate": 5.270399126087466e-05, - "loss": 3.1228, - "step": 46810 - }, - { - "epoch": 0.81, - "learning_rate": 5.265742833107425e-05, - "loss": 3.1467, - "step": 46815 - }, - { - "epoch": 0.81, - "learning_rate": 5.2610884000167595e-05, - "loss": 3.1517, - "step": 46820 - }, - { - "epoch": 0.81, - "learning_rate": 5.256435827165454e-05, - "loss": 3.2064, - "step": 46825 - }, - { - "epoch": 0.81, - "learning_rate": 5.251785114903374e-05, - "loss": 3.0334, - "step": 46830 - }, - { - "epoch": 0.81, - "learning_rate": 5.247136263580227e-05, - "loss": 3.0695, - "step": 46835 - }, - { - "epoch": 0.81, - "learning_rate": 5.242489273545564e-05, - "loss": 3.1541, - "step": 46840 - }, - { - "epoch": 0.81, - "learning_rate": 5.237844145148834e-05, - "loss": 3.1188, - "step": 46845 - }, - { - "epoch": 0.81, - "learning_rate": 5.233200878739319e-05, - "loss": 3.1888, - "step": 46850 - }, - { - "epoch": 0.81, - "learning_rate": 5.228559474666162e-05, - "loss": 3.1343, - "step": 46855 - }, - { - "epoch": 0.81, - "learning_rate": 5.2239199332783975e-05, - "loss": 3.0589, - "step": 46860 - }, - { - "epoch": 0.81, - "learning_rate": 5.2192822549248634e-05, - "loss": 3.1058, - "step": 46865 - }, - { - "epoch": 0.81, - "learning_rate": 5.214646439954311e-05, - "loss": 3.2759, - "step": 46870 - }, - { - "epoch": 0.81, - "learning_rate": 5.210012488715324e-05, - "loss": 3.1772, - "step": 46875 - }, - { - "epoch": 0.81, - "learning_rate": 5.205380401556344e-05, - "loss": 3.0871, - "step": 46880 - }, - { - "epoch": 0.81, - "learning_rate": 5.200750178825702e-05, - "loss": 3.2545, - "step": 46885 - }, - { - "epoch": 0.81, - "learning_rate": 5.1961218208715406e-05, - "loss": 3.2528, - "step": 46890 - }, - { - "epoch": 0.81, - "learning_rate": 5.191495328041904e-05, - "loss": 3.072, - "step": 46895 - }, - { - "epoch": 0.81, - "learning_rate": 5.18687070068468e-05, - "loss": 3.1296, - "step": 46900 - }, - { - "epoch": 0.81, - "learning_rate": 5.1822479391476046e-05, - "loss": 3.1255, - "step": 46905 - }, - { - "epoch": 0.81, - "learning_rate": 5.1776270437783096e-05, - "loss": 3.1293, - "step": 46910 - }, - { - "epoch": 0.81, - "learning_rate": 5.17300801492424e-05, - "loss": 3.0896, - "step": 46915 - }, - { - "epoch": 0.81, - "learning_rate": 5.168390852932726e-05, - "loss": 3.0692, - "step": 46920 - }, - { - "epoch": 0.81, - "learning_rate": 5.163775558150968e-05, - "loss": 3.1975, - "step": 46925 - }, - { - "epoch": 0.81, - "learning_rate": 5.159162130926002e-05, - "loss": 3.1942, - "step": 46930 - }, - { - "epoch": 0.81, - "learning_rate": 5.154550571604739e-05, - "loss": 3.0869, - "step": 46935 - }, - { - "epoch": 0.81, - "learning_rate": 5.1499408805339396e-05, - "loss": 3.0404, - "step": 46940 - }, - { - "epoch": 0.81, - "learning_rate": 5.145333058060227e-05, - "loss": 3.2056, - "step": 46945 - }, - { - "epoch": 0.81, - "learning_rate": 5.140727104530095e-05, - "loss": 3.1984, - "step": 46950 - }, - { - "epoch": 0.81, - "learning_rate": 5.136123020289885e-05, - "loss": 3.127, - "step": 46955 - }, - { - "epoch": 0.81, - "learning_rate": 5.131520805685795e-05, - "loss": 3.0707, - "step": 46960 - }, - { - "epoch": 0.81, - "learning_rate": 5.126920461063894e-05, - "loss": 3.0808, - "step": 46965 - }, - { - "epoch": 0.81, - "learning_rate": 5.122321986770095e-05, - "loss": 3.1137, - "step": 46970 - }, - { - "epoch": 0.81, - "learning_rate": 5.117725383150192e-05, - "loss": 3.1588, - "step": 46975 - }, - { - "epoch": 0.81, - "learning_rate": 5.113130650549822e-05, - "loss": 3.1079, - "step": 46980 - }, - { - "epoch": 0.81, - "learning_rate": 5.108537789314484e-05, - "loss": 3.2413, - "step": 46985 - }, - { - "epoch": 0.81, - "learning_rate": 5.1039467997895364e-05, - "loss": 3.3086, - "step": 46990 - }, - { - "epoch": 0.81, - "learning_rate": 5.0993576823202035e-05, - "loss": 3.1472, - "step": 46995 - }, - { - "epoch": 0.81, - "learning_rate": 5.094770437251547e-05, - "loss": 3.0839, - "step": 47000 - }, - { - "epoch": 0.81, - "eval_loss": 3.1730329990386963, - "eval_runtime": 151.5758, - "eval_samples_per_second": 12.146, - "eval_steps_per_second": 0.765, - "step": 47000 - }, - { - "epoch": 0.81, - "learning_rate": 5.0901850649285236e-05, - "loss": 3.1681, - "step": 47005 - }, - { - "epoch": 0.81, - "learning_rate": 5.085601565695926e-05, - "loss": 3.1697, - "step": 47010 - }, - { - "epoch": 0.81, - "learning_rate": 5.081019939898402e-05, - "loss": 3.2027, - "step": 47015 - }, - { - "epoch": 0.81, - "learning_rate": 5.076440187880473e-05, - "loss": 3.2215, - "step": 47020 - }, - { - "epoch": 0.81, - "learning_rate": 5.071862309986503e-05, - "loss": 3.1788, - "step": 47025 - }, - { - "epoch": 0.81, - "learning_rate": 5.0672863065607404e-05, - "loss": 3.1206, - "step": 47030 - }, - { - "epoch": 0.81, - "learning_rate": 5.0627121779472704e-05, - "loss": 3.1373, - "step": 47035 - }, - { - "epoch": 0.81, - "learning_rate": 5.0581399244900434e-05, - "loss": 3.2021, - "step": 47040 - }, - { - "epoch": 0.81, - "learning_rate": 5.0535695465328653e-05, - "loss": 3.141, - "step": 47045 - }, - { - "epoch": 0.81, - "learning_rate": 5.049001044419404e-05, - "loss": 3.1628, - "step": 47050 - }, - { - "epoch": 0.81, - "learning_rate": 5.044434418493202e-05, - "loss": 3.0681, - "step": 47055 - }, - { - "epoch": 0.81, - "learning_rate": 5.0398696690976334e-05, - "loss": 3.1685, - "step": 47060 - }, - { - "epoch": 0.81, - "learning_rate": 5.0353067965759455e-05, - "loss": 3.3124, - "step": 47065 - }, - { - "epoch": 0.81, - "learning_rate": 5.0307458012712485e-05, - "loss": 3.1279, - "step": 47070 - }, - { - "epoch": 0.81, - "learning_rate": 5.0261866835265e-05, - "loss": 3.0975, - "step": 47075 - }, - { - "epoch": 0.81, - "learning_rate": 5.021629443684517e-05, - "loss": 3.1015, - "step": 47080 - }, - { - "epoch": 0.81, - "learning_rate": 5.017074082087994e-05, - "loss": 3.0979, - "step": 47085 - }, - { - "epoch": 0.81, - "learning_rate": 5.012520599079466e-05, - "loss": 3.2613, - "step": 47090 - }, - { - "epoch": 0.82, - "learning_rate": 5.007968995001329e-05, - "loss": 3.0582, - "step": 47095 - }, - { - "epoch": 0.82, - "learning_rate": 5.003419270195843e-05, - "loss": 3.2359, - "step": 47100 - }, - { - "epoch": 0.82, - "learning_rate": 4.998871425005114e-05, - "loss": 3.1534, - "step": 47105 - }, - { - "epoch": 0.82, - "learning_rate": 4.994325459771132e-05, - "loss": 3.101, - "step": 47110 - }, - { - "epoch": 0.82, - "learning_rate": 4.989781374835724e-05, - "loss": 3.2268, - "step": 47115 - }, - { - "epoch": 0.82, - "learning_rate": 4.985239170540579e-05, - "loss": 3.0948, - "step": 47120 - }, - { - "epoch": 0.82, - "learning_rate": 4.9806988472272544e-05, - "loss": 3.1097, - "step": 47125 - }, - { - "epoch": 0.82, - "learning_rate": 4.976160405237144e-05, - "loss": 3.0346, - "step": 47130 - }, - { - "epoch": 0.82, - "learning_rate": 4.971623844911534e-05, - "loss": 3.1732, - "step": 47135 - }, - { - "epoch": 0.82, - "learning_rate": 4.967089166591549e-05, - "loss": 3.0997, - "step": 47140 - }, - { - "epoch": 0.82, - "learning_rate": 4.9625563706181526e-05, - "loss": 3.1634, - "step": 47145 - }, - { - "epoch": 0.82, - "learning_rate": 4.958025457332212e-05, - "loss": 3.1382, - "step": 47150 - }, - { - "epoch": 0.82, - "learning_rate": 4.953496427074418e-05, - "loss": 3.159, - "step": 47155 - }, - { - "epoch": 0.82, - "learning_rate": 4.948969280185322e-05, - "loss": 2.9929, - "step": 47160 - }, - { - "epoch": 0.82, - "learning_rate": 4.944444017005368e-05, - "loss": 3.0907, - "step": 47165 - }, - { - "epoch": 0.82, - "learning_rate": 4.9399206378748e-05, - "loss": 3.2111, - "step": 47170 - }, - { - "epoch": 0.82, - "learning_rate": 4.935399143133777e-05, - "loss": 3.1707, - "step": 47175 - }, - { - "epoch": 0.82, - "learning_rate": 4.930879533122286e-05, - "loss": 3.1829, - "step": 47180 - }, - { - "epoch": 0.82, - "learning_rate": 4.9263618081801695e-05, - "loss": 3.0983, - "step": 47185 - }, - { - "epoch": 0.82, - "learning_rate": 4.921845968647158e-05, - "loss": 3.0433, - "step": 47190 - }, - { - "epoch": 0.82, - "learning_rate": 4.9173320148628004e-05, - "loss": 3.1361, - "step": 47195 - }, - { - "epoch": 0.82, - "learning_rate": 4.9128199471665186e-05, - "loss": 3.1164, - "step": 47200 - }, - { - "epoch": 0.82, - "learning_rate": 4.9083097658976145e-05, - "loss": 2.9894, - "step": 47205 - }, - { - "epoch": 0.82, - "learning_rate": 4.9038014713952266e-05, - "loss": 3.1765, - "step": 47210 - }, - { - "epoch": 0.82, - "learning_rate": 4.8992950639983475e-05, - "loss": 3.1134, - "step": 47215 - }, - { - "epoch": 0.82, - "learning_rate": 4.8947905440458454e-05, - "loss": 3.2183, - "step": 47220 - }, - { - "epoch": 0.82, - "learning_rate": 4.89028791187642e-05, - "loss": 3.1325, - "step": 47225 - }, - { - "epoch": 0.82, - "learning_rate": 4.885787167828671e-05, - "loss": 3.1272, - "step": 47230 - }, - { - "epoch": 0.82, - "learning_rate": 4.881288312241016e-05, - "loss": 3.2502, - "step": 47235 - }, - { - "epoch": 0.82, - "learning_rate": 4.876791345451746e-05, - "loss": 3.1172, - "step": 47240 - }, - { - "epoch": 0.82, - "learning_rate": 4.8722962677990166e-05, - "loss": 3.0945, - "step": 47245 - }, - { - "epoch": 0.82, - "learning_rate": 4.86780307962082e-05, - "loss": 3.0672, - "step": 47250 - }, - { - "epoch": 0.82, - "learning_rate": 4.8633117812550405e-05, - "loss": 3.1783, - "step": 47255 - }, - { - "epoch": 0.82, - "learning_rate": 4.858822373039391e-05, - "loss": 3.0584, - "step": 47260 - }, - { - "epoch": 0.82, - "learning_rate": 4.854334855311447e-05, - "loss": 3.2328, - "step": 47265 - }, - { - "epoch": 0.82, - "learning_rate": 4.849849228408663e-05, - "loss": 3.065, - "step": 47270 - }, - { - "epoch": 0.82, - "learning_rate": 4.8453654926683197e-05, - "loss": 3.101, - "step": 47275 - }, - { - "epoch": 0.82, - "learning_rate": 4.8408836484275694e-05, - "loss": 3.1174, - "step": 47280 - }, - { - "epoch": 0.82, - "learning_rate": 4.8364036960234366e-05, - "loss": 3.1366, - "step": 47285 - }, - { - "epoch": 0.82, - "learning_rate": 4.831925635792785e-05, - "loss": 3.1351, - "step": 47290 - }, - { - "epoch": 0.82, - "learning_rate": 4.827449468072342e-05, - "loss": 3.2125, - "step": 47295 - }, - { - "epoch": 0.82, - "learning_rate": 4.822975193198688e-05, - "loss": 3.1998, - "step": 47300 - }, - { - "epoch": 0.82, - "learning_rate": 4.818502811508265e-05, - "loss": 3.0836, - "step": 47305 - }, - { - "epoch": 0.82, - "learning_rate": 4.8140323233373854e-05, - "loss": 2.9659, - "step": 47310 - }, - { - "epoch": 0.82, - "learning_rate": 4.809563729022196e-05, - "loss": 3.2244, - "step": 47315 - }, - { - "epoch": 0.82, - "learning_rate": 4.8050970288987187e-05, - "loss": 3.1523, - "step": 47320 - }, - { - "epoch": 0.82, - "learning_rate": 4.8006322233028205e-05, - "loss": 3.178, - "step": 47325 - }, - { - "epoch": 0.82, - "learning_rate": 4.7961693125702284e-05, - "loss": 3.0367, - "step": 47330 - }, - { - "epoch": 0.82, - "learning_rate": 4.791708297036544e-05, - "loss": 3.0901, - "step": 47335 - }, - { - "epoch": 0.82, - "learning_rate": 4.787249177037202e-05, - "loss": 3.1631, - "step": 47340 - }, - { - "epoch": 0.82, - "learning_rate": 4.782791952907512e-05, - "loss": 3.0768, - "step": 47345 - }, - { - "epoch": 0.82, - "learning_rate": 4.778336624982632e-05, - "loss": 3.0922, - "step": 47350 - }, - { - "epoch": 0.82, - "learning_rate": 4.7738831935975766e-05, - "loss": 2.9787, - "step": 47355 - }, - { - "epoch": 0.82, - "learning_rate": 4.769431659087215e-05, - "loss": 3.1878, - "step": 47360 - }, - { - "epoch": 0.82, - "learning_rate": 4.7649820217862966e-05, - "loss": 3.2068, - "step": 47365 - }, - { - "epoch": 0.82, - "learning_rate": 4.760534282029403e-05, - "loss": 3.2481, - "step": 47370 - }, - { - "epoch": 0.82, - "learning_rate": 4.7560884401509816e-05, - "loss": 3.1639, - "step": 47375 - }, - { - "epoch": 0.82, - "learning_rate": 4.7516444964853365e-05, - "loss": 3.1155, - "step": 47380 - }, - { - "epoch": 0.82, - "learning_rate": 4.7472024513666205e-05, - "loss": 3.1491, - "step": 47385 - }, - { - "epoch": 0.82, - "learning_rate": 4.742762305128871e-05, - "loss": 3.0542, - "step": 47390 - }, - { - "epoch": 0.82, - "learning_rate": 4.738324058105953e-05, - "loss": 3.1943, - "step": 47395 - }, - { - "epoch": 0.82, - "learning_rate": 4.733887710631601e-05, - "loss": 3.1193, - "step": 47400 - }, - { - "epoch": 0.82, - "learning_rate": 4.72945326303941e-05, - "loss": 3.2377, - "step": 47405 - }, - { - "epoch": 0.82, - "learning_rate": 4.725020715662814e-05, - "loss": 3.1782, - "step": 47410 - }, - { - "epoch": 0.82, - "learning_rate": 4.720590068835136e-05, - "loss": 3.1825, - "step": 47415 - }, - { - "epoch": 0.82, - "learning_rate": 4.716161322889537e-05, - "loss": 3.1946, - "step": 47420 - }, - { - "epoch": 0.82, - "learning_rate": 4.711734478159016e-05, - "loss": 3.1585, - "step": 47425 - }, - { - "epoch": 0.82, - "learning_rate": 4.707309534976468e-05, - "loss": 3.1594, - "step": 47430 - }, - { - "epoch": 0.82, - "learning_rate": 4.702886493674619e-05, - "loss": 3.0485, - "step": 47435 - }, - { - "epoch": 0.82, - "learning_rate": 4.698465354586054e-05, - "loss": 3.1429, - "step": 47440 - }, - { - "epoch": 0.82, - "learning_rate": 4.69404611804324e-05, - "loss": 3.2192, - "step": 47445 - }, - { - "epoch": 0.82, - "learning_rate": 4.6896287843784506e-05, - "loss": 3.2076, - "step": 47450 - }, - { - "epoch": 0.82, - "learning_rate": 4.685213353923875e-05, - "loss": 2.9989, - "step": 47455 - }, - { - "epoch": 0.82, - "learning_rate": 4.680799827011515e-05, - "loss": 3.2398, - "step": 47460 - }, - { - "epoch": 0.82, - "learning_rate": 4.676388203973242e-05, - "loss": 3.1911, - "step": 47465 - }, - { - "epoch": 0.82, - "learning_rate": 4.6719784851408105e-05, - "loss": 3.0343, - "step": 47470 - }, - { - "epoch": 0.82, - "learning_rate": 4.6675706708457775e-05, - "loss": 3.214, - "step": 47475 - }, - { - "epoch": 0.82, - "learning_rate": 4.663164761419613e-05, - "loss": 3.045, - "step": 47480 - }, - { - "epoch": 0.82, - "learning_rate": 4.658760757193607e-05, - "loss": 3.0966, - "step": 47485 - }, - { - "epoch": 0.82, - "learning_rate": 4.654358658498914e-05, - "loss": 3.1184, - "step": 47490 - }, - { - "epoch": 0.82, - "learning_rate": 4.649958465666568e-05, - "loss": 3.0956, - "step": 47495 - }, - { - "epoch": 0.82, - "learning_rate": 4.6455601790274224e-05, - "loss": 3.1493, - "step": 47500 - }, - { - "epoch": 0.82, - "eval_loss": 3.1702067852020264, - "eval_runtime": 150.5836, - "eval_samples_per_second": 12.226, - "eval_steps_per_second": 0.77, - "step": 47500 - }, - { - "epoch": 0.82, - "learning_rate": 4.641163798912205e-05, - "loss": 3.1356, - "step": 47505 - }, - { - "epoch": 0.82, - "learning_rate": 4.636769325651516e-05, - "loss": 3.1238, - "step": 47510 - }, - { - "epoch": 0.82, - "learning_rate": 4.63237675957579e-05, - "loss": 3.2155, - "step": 47515 - }, - { - "epoch": 0.82, - "learning_rate": 4.627986101015313e-05, - "loss": 3.1784, - "step": 47520 - }, - { - "epoch": 0.82, - "learning_rate": 4.623597350300271e-05, - "loss": 3.1229, - "step": 47525 - }, - { - "epoch": 0.82, - "learning_rate": 4.61921050776064e-05, - "loss": 3.2752, - "step": 47530 - }, - { - "epoch": 0.82, - "learning_rate": 4.61482557372631e-05, - "loss": 3.1234, - "step": 47535 - }, - { - "epoch": 0.82, - "learning_rate": 4.610442548527003e-05, - "loss": 3.0621, - "step": 47540 - }, - { - "epoch": 0.82, - "learning_rate": 4.606061432492285e-05, - "loss": 3.0858, - "step": 47545 - }, - { - "epoch": 0.82, - "learning_rate": 4.601682225951624e-05, - "loss": 3.1693, - "step": 47550 - }, - { - "epoch": 0.82, - "learning_rate": 4.597304929234279e-05, - "loss": 3.2589, - "step": 47555 - }, - { - "epoch": 0.82, - "learning_rate": 4.592929542669427e-05, - "loss": 3.1472, - "step": 47560 - }, - { - "epoch": 0.82, - "learning_rate": 4.588556066586061e-05, - "loss": 3.1025, - "step": 47565 - }, - { - "epoch": 0.82, - "learning_rate": 4.584184501313039e-05, - "loss": 3.1964, - "step": 47570 - }, - { - "epoch": 0.82, - "learning_rate": 4.5798148471791066e-05, - "loss": 3.0656, - "step": 47575 - }, - { - "epoch": 0.82, - "learning_rate": 4.575447104512812e-05, - "loss": 3.2059, - "step": 47580 - }, - { - "epoch": 0.82, - "learning_rate": 4.571081273642588e-05, - "loss": 3.1571, - "step": 47585 - }, - { - "epoch": 0.82, - "learning_rate": 4.566717354896741e-05, - "loss": 3.1968, - "step": 47590 - }, - { - "epoch": 0.82, - "learning_rate": 4.562355348603402e-05, - "loss": 3.0857, - "step": 47595 - }, - { - "epoch": 0.82, - "learning_rate": 4.5579952550905786e-05, - "loss": 3.1365, - "step": 47600 - }, - { - "epoch": 0.82, - "learning_rate": 4.5536370746861196e-05, - "loss": 3.1453, - "step": 47605 - }, - { - "epoch": 0.82, - "learning_rate": 4.549280807717737e-05, - "loss": 3.1229, - "step": 47610 - }, - { - "epoch": 0.82, - "learning_rate": 4.544926454513015e-05, - "loss": 3.1685, - "step": 47615 - }, - { - "epoch": 0.82, - "learning_rate": 4.5405740153993653e-05, - "loss": 3.1028, - "step": 47620 - }, - { - "epoch": 0.82, - "learning_rate": 4.536223490704073e-05, - "loss": 3.0121, - "step": 47625 - }, - { - "epoch": 0.82, - "learning_rate": 4.531874880754273e-05, - "loss": 3.2499, - "step": 47630 - }, - { - "epoch": 0.82, - "learning_rate": 4.527528185876953e-05, - "loss": 3.1126, - "step": 47635 - }, - { - "epoch": 0.82, - "learning_rate": 4.523183406398976e-05, - "loss": 3.1939, - "step": 47640 - }, - { - "epoch": 0.82, - "learning_rate": 4.518840542647043e-05, - "loss": 3.1388, - "step": 47645 - }, - { - "epoch": 0.82, - "learning_rate": 4.5144995949477105e-05, - "loss": 3.1516, - "step": 47650 - }, - { - "epoch": 0.82, - "learning_rate": 4.510160563627396e-05, - "loss": 3.2073, - "step": 47655 - }, - { - "epoch": 0.82, - "learning_rate": 4.5058234490123765e-05, - "loss": 3.1583, - "step": 47660 - }, - { - "epoch": 0.82, - "learning_rate": 4.5014882514287674e-05, - "loss": 3.2169, - "step": 47665 - }, - { - "epoch": 0.82, - "learning_rate": 4.4971549712025743e-05, - "loss": 3.2414, - "step": 47670 - }, - { - "epoch": 0.83, - "learning_rate": 4.492823608659627e-05, - "loss": 3.1101, - "step": 47675 - }, - { - "epoch": 0.83, - "learning_rate": 4.488494164125621e-05, - "loss": 3.0738, - "step": 47680 - }, - { - "epoch": 0.83, - "learning_rate": 4.48416663792611e-05, - "loss": 3.0769, - "step": 47685 - }, - { - "epoch": 0.83, - "learning_rate": 4.479841030386493e-05, - "loss": 3.1254, - "step": 47690 - }, - { - "epoch": 0.83, - "learning_rate": 4.47551734183205e-05, - "loss": 3.1089, - "step": 47695 - }, - { - "epoch": 0.83, - "learning_rate": 4.471195572587898e-05, - "loss": 3.1742, - "step": 47700 - }, - { - "epoch": 0.83, - "learning_rate": 4.466875722978993e-05, - "loss": 3.1768, - "step": 47705 - }, - { - "epoch": 0.83, - "learning_rate": 4.462557793330186e-05, - "loss": 3.1154, - "step": 47710 - }, - { - "epoch": 0.83, - "learning_rate": 4.458241783966153e-05, - "loss": 3.1822, - "step": 47715 - }, - { - "epoch": 0.83, - "learning_rate": 4.4539276952114354e-05, - "loss": 3.1468, - "step": 47720 - }, - { - "epoch": 0.83, - "learning_rate": 4.4496155273904434e-05, - "loss": 3.1365, - "step": 47725 - }, - { - "epoch": 0.83, - "learning_rate": 4.44530528082741e-05, - "loss": 3.1987, - "step": 47730 - }, - { - "epoch": 0.83, - "learning_rate": 4.4409969558464575e-05, - "loss": 3.1955, - "step": 47735 - }, - { - "epoch": 0.83, - "learning_rate": 4.436690552771549e-05, - "loss": 3.1044, - "step": 47740 - }, - { - "epoch": 0.83, - "learning_rate": 4.432386071926493e-05, - "loss": 3.1316, - "step": 47745 - }, - { - "epoch": 0.83, - "learning_rate": 4.428083513634979e-05, - "loss": 3.1024, - "step": 47750 - }, - { - "epoch": 0.83, - "learning_rate": 4.4237828782205296e-05, - "loss": 3.1831, - "step": 47755 - }, - { - "epoch": 0.83, - "learning_rate": 4.419484166006533e-05, - "loss": 3.103, - "step": 47760 - }, - { - "epoch": 0.83, - "learning_rate": 4.4151873773162295e-05, - "loss": 3.0656, - "step": 47765 - }, - { - "epoch": 0.83, - "learning_rate": 4.410892512472708e-05, - "loss": 3.1923, - "step": 47770 - }, - { - "epoch": 0.83, - "learning_rate": 4.4065995717989335e-05, - "loss": 3.1408, - "step": 47775 - }, - { - "epoch": 0.83, - "learning_rate": 4.402308555617714e-05, - "loss": 3.2423, - "step": 47780 - }, - { - "epoch": 0.83, - "learning_rate": 4.3980194642516916e-05, - "loss": 3.075, - "step": 47785 - }, - { - "epoch": 0.83, - "learning_rate": 4.3937322980234025e-05, - "loss": 3.1385, - "step": 47790 - }, - { - "epoch": 0.83, - "learning_rate": 4.3894470572552174e-05, - "loss": 3.2144, - "step": 47795 - }, - { - "epoch": 0.83, - "learning_rate": 4.385163742269353e-05, - "loss": 3.1443, - "step": 47800 - }, - { - "epoch": 0.83, - "learning_rate": 4.380882353387918e-05, - "loss": 3.2049, - "step": 47805 - }, - { - "epoch": 0.83, - "learning_rate": 4.376602890932818e-05, - "loss": 3.1935, - "step": 47810 - }, - { - "epoch": 0.83, - "learning_rate": 4.3723253552258685e-05, - "loss": 3.1849, - "step": 47815 - }, - { - "epoch": 0.83, - "learning_rate": 4.368049746588713e-05, - "loss": 3.1479, - "step": 47820 - }, - { - "epoch": 0.83, - "learning_rate": 4.363776065342851e-05, - "loss": 3.0333, - "step": 47825 - }, - { - "epoch": 0.83, - "learning_rate": 4.359504311809655e-05, - "loss": 3.1875, - "step": 47830 - }, - { - "epoch": 0.83, - "learning_rate": 4.355234486310318e-05, - "loss": 3.0856, - "step": 47835 - }, - { - "epoch": 0.83, - "learning_rate": 4.350966589165928e-05, - "loss": 3.1907, - "step": 47840 - }, - { - "epoch": 0.83, - "learning_rate": 4.3467006206973983e-05, - "loss": 3.1594, - "step": 47845 - }, - { - "epoch": 0.83, - "learning_rate": 4.3424365812255057e-05, - "loss": 3.2084, - "step": 47850 - }, - { - "epoch": 0.83, - "learning_rate": 4.3381744710709024e-05, - "loss": 3.1532, - "step": 47855 - }, - { - "epoch": 0.83, - "learning_rate": 4.333914290554058e-05, - "loss": 3.1295, - "step": 47860 - }, - { - "epoch": 0.83, - "learning_rate": 4.3296560399953174e-05, - "loss": 3.029, - "step": 47865 - }, - { - "epoch": 0.83, - "learning_rate": 4.325399719714887e-05, - "loss": 3.2236, - "step": 47870 - }, - { - "epoch": 0.83, - "learning_rate": 4.321145330032818e-05, - "loss": 3.1809, - "step": 47875 - }, - { - "epoch": 0.83, - "learning_rate": 4.316892871269021e-05, - "loss": 3.1428, - "step": 47880 - }, - { - "epoch": 0.83, - "learning_rate": 4.312642343743253e-05, - "loss": 3.0379, - "step": 47885 - }, - { - "epoch": 0.83, - "learning_rate": 4.3083937477751326e-05, - "loss": 3.1767, - "step": 47890 - }, - { - "epoch": 0.83, - "learning_rate": 4.3041470836841375e-05, - "loss": 3.111, - "step": 47895 - }, - { - "epoch": 0.83, - "learning_rate": 4.299902351789595e-05, - "loss": 3.1206, - "step": 47900 - }, - { - "epoch": 0.83, - "learning_rate": 4.295659552410683e-05, - "loss": 3.1395, - "step": 47905 - }, - { - "epoch": 0.83, - "learning_rate": 4.29141868586644e-05, - "loss": 3.0333, - "step": 47910 - }, - { - "epoch": 0.83, - "learning_rate": 4.287179752475755e-05, - "loss": 3.1619, - "step": 47915 - }, - { - "epoch": 0.83, - "learning_rate": 4.282942752557378e-05, - "loss": 3.1564, - "step": 47920 - }, - { - "epoch": 0.83, - "learning_rate": 4.278707686429909e-05, - "loss": 3.0934, - "step": 47925 - }, - { - "epoch": 0.83, - "learning_rate": 4.2744745544118055e-05, - "loss": 3.1424, - "step": 47930 - }, - { - "epoch": 0.83, - "learning_rate": 4.27024335682137e-05, - "loss": 3.095, - "step": 47935 - }, - { - "epoch": 0.83, - "learning_rate": 4.266014093976775e-05, - "loss": 3.1656, - "step": 47940 - }, - { - "epoch": 0.83, - "learning_rate": 4.261786766196026e-05, - "loss": 3.1747, - "step": 47945 - }, - { - "epoch": 0.83, - "learning_rate": 4.257561373797015e-05, - "loss": 3.1346, - "step": 47950 - }, - { - "epoch": 0.83, - "learning_rate": 4.253337917097458e-05, - "loss": 3.1829, - "step": 47955 - }, - { - "epoch": 0.83, - "learning_rate": 4.249116396414941e-05, - "loss": 3.2223, - "step": 47960 - }, - { - "epoch": 0.83, - "learning_rate": 4.244896812066904e-05, - "loss": 3.1598, - "step": 47965 - }, - { - "epoch": 0.83, - "learning_rate": 4.2406791643706216e-05, - "loss": 3.1704, - "step": 47970 - }, - { - "epoch": 0.83, - "learning_rate": 4.236463453643262e-05, - "loss": 3.1106, - "step": 47975 - }, - { - "epoch": 0.83, - "learning_rate": 4.23224968020181e-05, - "loss": 3.2074, - "step": 47980 - }, - { - "epoch": 0.83, - "learning_rate": 4.22803784436313e-05, - "loss": 3.0668, - "step": 47985 - }, - { - "epoch": 0.83, - "learning_rate": 4.223827946443922e-05, - "loss": 3.1491, - "step": 47990 - }, - { - "epoch": 0.83, - "learning_rate": 4.219619986760745e-05, - "loss": 3.1812, - "step": 47995 - }, - { - "epoch": 0.83, - "learning_rate": 4.21541396563003e-05, - "loss": 3.0527, - "step": 48000 - }, - { - "epoch": 0.83, - "eval_loss": 3.1674163341522217, - "eval_runtime": 150.7672, - "eval_samples_per_second": 12.211, - "eval_steps_per_second": 0.769, - "step": 48000 - }, - { - "epoch": 0.83, - "learning_rate": 4.211209883368041e-05, - "loss": 3.0893, - "step": 48005 - }, - { - "epoch": 0.83, - "learning_rate": 4.207007740290901e-05, - "loss": 3.29, - "step": 48010 - }, - { - "epoch": 0.83, - "learning_rate": 4.202807536714594e-05, - "loss": 3.1224, - "step": 48015 - }, - { - "epoch": 0.83, - "learning_rate": 4.198609272954952e-05, - "loss": 3.1394, - "step": 48020 - }, - { - "epoch": 0.83, - "learning_rate": 4.1944129493276565e-05, - "loss": 3.2085, - "step": 48025 - }, - { - "epoch": 0.83, - "learning_rate": 4.1902185661482614e-05, - "loss": 3.1503, - "step": 48030 - }, - { - "epoch": 0.83, - "learning_rate": 4.186026123732156e-05, - "loss": 3.1939, - "step": 48035 - }, - { - "epoch": 0.83, - "learning_rate": 4.1818356223945915e-05, - "loss": 3.1623, - "step": 48040 - }, - { - "epoch": 0.83, - "learning_rate": 4.177647062450677e-05, - "loss": 3.0938, - "step": 48045 - }, - { - "epoch": 0.83, - "learning_rate": 4.173460444215352e-05, - "loss": 3.0736, - "step": 48050 - }, - { - "epoch": 0.83, - "learning_rate": 4.169275768003456e-05, - "loss": 3.2231, - "step": 48055 - }, - { - "epoch": 0.83, - "learning_rate": 4.1650930341296375e-05, - "loss": 3.074, - "step": 48060 - }, - { - "epoch": 0.83, - "learning_rate": 4.1609122429084205e-05, - "loss": 3.198, - "step": 48065 - }, - { - "epoch": 0.83, - "learning_rate": 4.156733394654185e-05, - "loss": 3.1164, - "step": 48070 - }, - { - "epoch": 0.83, - "learning_rate": 4.1525564896811416e-05, - "loss": 3.0952, - "step": 48075 - }, - { - "epoch": 0.83, - "learning_rate": 4.148381528303393e-05, - "loss": 3.1842, - "step": 48080 - }, - { - "epoch": 0.83, - "learning_rate": 4.144208510834871e-05, - "loss": 3.1148, - "step": 48085 - }, - { - "epoch": 0.83, - "learning_rate": 4.140037437589348e-05, - "loss": 3.194, - "step": 48090 - }, - { - "epoch": 0.83, - "learning_rate": 4.135868308880488e-05, - "loss": 3.2252, - "step": 48095 - }, - { - "epoch": 0.83, - "learning_rate": 4.131701125021778e-05, - "loss": 3.1747, - "step": 48100 - }, - { - "epoch": 0.83, - "learning_rate": 4.1275358863265615e-05, - "loss": 3.0487, - "step": 48105 - }, - { - "epoch": 0.83, - "learning_rate": 4.1233725931080686e-05, - "loss": 3.0774, - "step": 48110 - }, - { - "epoch": 0.83, - "learning_rate": 4.119211245679328e-05, - "loss": 3.1601, - "step": 48115 - }, - { - "epoch": 0.83, - "learning_rate": 4.11505184435327e-05, - "loss": 3.1999, - "step": 48120 - }, - { - "epoch": 0.83, - "learning_rate": 4.110894389442654e-05, - "loss": 3.1054, - "step": 48125 - }, - { - "epoch": 0.83, - "learning_rate": 4.106738881260093e-05, - "loss": 3.1546, - "step": 48130 - }, - { - "epoch": 0.83, - "learning_rate": 4.102585320118085e-05, - "loss": 3.1418, - "step": 48135 - }, - { - "epoch": 0.83, - "learning_rate": 4.098433706328922e-05, - "loss": 3.1572, - "step": 48140 - }, - { - "epoch": 0.83, - "learning_rate": 4.0942840402048085e-05, - "loss": 3.0344, - "step": 48145 - }, - { - "epoch": 0.83, - "learning_rate": 4.090136322057771e-05, - "loss": 3.1375, - "step": 48150 - }, - { - "epoch": 0.83, - "learning_rate": 4.085990552199687e-05, - "loss": 3.1835, - "step": 48155 - }, - { - "epoch": 0.83, - "learning_rate": 4.081846730942324e-05, - "loss": 3.1463, - "step": 48160 - }, - { - "epoch": 0.83, - "learning_rate": 4.077704858597249e-05, - "loss": 3.0912, - "step": 48165 - }, - { - "epoch": 0.83, - "learning_rate": 4.07356493547591e-05, - "loss": 3.2175, - "step": 48170 - }, - { - "epoch": 0.83, - "learning_rate": 4.069426961889628e-05, - "loss": 3.1519, - "step": 48175 - }, - { - "epoch": 0.83, - "learning_rate": 4.0652909381495436e-05, - "loss": 3.1169, - "step": 48180 - }, - { - "epoch": 0.83, - "learning_rate": 4.061156864566668e-05, - "loss": 3.1638, - "step": 48185 - }, - { - "epoch": 0.83, - "learning_rate": 4.057024741451863e-05, - "loss": 3.1834, - "step": 48190 - }, - { - "epoch": 0.83, - "learning_rate": 4.052894569115833e-05, - "loss": 3.1308, - "step": 48195 - }, - { - "epoch": 0.83, - "learning_rate": 4.048766347869163e-05, - "loss": 3.097, - "step": 48200 - }, - { - "epoch": 0.83, - "learning_rate": 4.0446400780222654e-05, - "loss": 3.1599, - "step": 48205 - }, - { - "epoch": 0.83, - "learning_rate": 4.0405157598854076e-05, - "loss": 3.129, - "step": 48210 - }, - { - "epoch": 0.83, - "learning_rate": 4.0363933937687386e-05, - "loss": 3.2136, - "step": 48215 - }, - { - "epoch": 0.83, - "learning_rate": 4.032272979982211e-05, - "loss": 3.156, - "step": 48220 - }, - { - "epoch": 0.83, - "learning_rate": 4.028154518835679e-05, - "loss": 3.112, - "step": 48225 - }, - { - "epoch": 0.83, - "learning_rate": 4.024038010638826e-05, - "loss": 3.1369, - "step": 48230 - }, - { - "epoch": 0.83, - "learning_rate": 4.0199234557011806e-05, - "loss": 3.2179, - "step": 48235 - }, - { - "epoch": 0.83, - "learning_rate": 4.0158108543321586e-05, - "loss": 3.2037, - "step": 48240 - }, - { - "epoch": 0.83, - "learning_rate": 4.01170020684099e-05, - "loss": 3.1935, - "step": 48245 - }, - { - "epoch": 0.84, - "learning_rate": 4.00759151353677e-05, - "loss": 3.1416, - "step": 48250 - }, - { - "epoch": 0.84, - "learning_rate": 4.003484774728465e-05, - "loss": 3.0572, - "step": 48255 - }, - { - "epoch": 0.84, - "learning_rate": 3.999379990724875e-05, - "loss": 3.2272, - "step": 48260 - }, - { - "epoch": 0.84, - "learning_rate": 3.9952771618346556e-05, - "loss": 2.9975, - "step": 48265 - }, - { - "epoch": 0.84, - "learning_rate": 3.9911762883663276e-05, - "loss": 3.0674, - "step": 48270 - }, - { - "epoch": 0.84, - "learning_rate": 3.9870773706282374e-05, - "loss": 3.0913, - "step": 48275 - }, - { - "epoch": 0.84, - "learning_rate": 3.982980408928625e-05, - "loss": 3.172, - "step": 48280 - }, - { - "epoch": 0.84, - "learning_rate": 3.978885403575551e-05, - "loss": 3.1857, - "step": 48285 - }, - { - "epoch": 0.84, - "learning_rate": 3.974792354876936e-05, - "loss": 3.0375, - "step": 48290 - }, - { - "epoch": 0.84, - "learning_rate": 3.970701263140559e-05, - "loss": 3.0507, - "step": 48295 - }, - { - "epoch": 0.84, - "learning_rate": 3.9666121286740514e-05, - "loss": 3.0858, - "step": 48300 - }, - { - "epoch": 0.84, - "learning_rate": 3.962524951784885e-05, - "loss": 3.1223, - "step": 48305 - }, - { - "epoch": 0.84, - "learning_rate": 3.958439732780409e-05, - "loss": 3.2392, - "step": 48310 - }, - { - "epoch": 0.84, - "learning_rate": 3.954356471967805e-05, - "loss": 3.0487, - "step": 48315 - }, - { - "epoch": 0.84, - "learning_rate": 3.95027516965411e-05, - "loss": 3.192, - "step": 48320 - }, - { - "epoch": 0.84, - "learning_rate": 3.94619582614622e-05, - "loss": 3.1445, - "step": 48325 - }, - { - "epoch": 0.84, - "learning_rate": 3.942118441750876e-05, - "loss": 3.2535, - "step": 48330 - }, - { - "epoch": 0.84, - "learning_rate": 3.938043016774685e-05, - "loss": 3.0691, - "step": 48335 - }, - { - "epoch": 0.84, - "learning_rate": 3.933969551524095e-05, - "loss": 3.1177, - "step": 48340 - }, - { - "epoch": 0.84, - "learning_rate": 3.9298980463054064e-05, - "loss": 3.1912, - "step": 48345 - }, - { - "epoch": 0.84, - "learning_rate": 3.9258285014247794e-05, - "loss": 3.0325, - "step": 48350 - }, - { - "epoch": 0.84, - "learning_rate": 3.9217609171882155e-05, - "loss": 3.1963, - "step": 48355 - }, - { - "epoch": 0.84, - "learning_rate": 3.917695293901585e-05, - "loss": 3.0921, - "step": 48360 - }, - { - "epoch": 0.84, - "learning_rate": 3.9136316318706084e-05, - "loss": 3.2268, - "step": 48365 - }, - { - "epoch": 0.84, - "learning_rate": 3.909569931400829e-05, - "loss": 3.1666, - "step": 48370 - }, - { - "epoch": 0.84, - "learning_rate": 3.905510192797685e-05, - "loss": 3.1087, - "step": 48375 - }, - { - "epoch": 0.84, - "learning_rate": 3.901452416366443e-05, - "loss": 3.1702, - "step": 48380 - }, - { - "epoch": 0.84, - "learning_rate": 3.897396602412218e-05, - "loss": 3.1439, - "step": 48385 - }, - { - "epoch": 0.84, - "learning_rate": 3.893342751240011e-05, - "loss": 3.1748, - "step": 48390 - }, - { - "epoch": 0.84, - "learning_rate": 3.889290863154619e-05, - "loss": 3.139, - "step": 48395 - }, - { - "epoch": 0.84, - "learning_rate": 3.885240938460747e-05, - "loss": 3.016, - "step": 48400 - }, - { - "epoch": 0.84, - "learning_rate": 3.881192977462918e-05, - "loss": 3.1218, - "step": 48405 - }, - { - "epoch": 0.84, - "learning_rate": 3.877146980465517e-05, - "loss": 3.0636, - "step": 48410 - }, - { - "epoch": 0.84, - "learning_rate": 3.8731029477727956e-05, - "loss": 3.0979, - "step": 48415 - }, - { - "epoch": 0.84, - "learning_rate": 3.869060879688823e-05, - "loss": 3.0291, - "step": 48420 - }, - { - "epoch": 0.84, - "learning_rate": 3.86502077651756e-05, - "loss": 3.1511, - "step": 48425 - }, - { - "epoch": 0.84, - "learning_rate": 3.860982638562796e-05, - "loss": 3.1245, - "step": 48430 - }, - { - "epoch": 0.84, - "learning_rate": 3.856946466128169e-05, - "loss": 3.2536, - "step": 48435 - }, - { - "epoch": 0.84, - "learning_rate": 3.8529122595171926e-05, - "loss": 3.0808, - "step": 48440 - }, - { - "epoch": 0.84, - "learning_rate": 3.84888001903322e-05, - "loss": 3.188, - "step": 48445 - }, - { - "epoch": 0.84, - "learning_rate": 3.844849744979434e-05, - "loss": 3.1776, - "step": 48450 - }, - { - "epoch": 0.84, - "learning_rate": 3.840821437658913e-05, - "loss": 3.0985, - "step": 48455 - }, - { - "epoch": 0.84, - "learning_rate": 3.836795097374553e-05, - "loss": 3.1585, - "step": 48460 - }, - { - "epoch": 0.84, - "learning_rate": 3.832770724429112e-05, - "loss": 3.0258, - "step": 48465 - }, - { - "epoch": 0.84, - "learning_rate": 3.8287483191252236e-05, - "loss": 3.137, - "step": 48470 - }, - { - "epoch": 0.84, - "learning_rate": 3.824727881765319e-05, - "loss": 3.0862, - "step": 48475 - }, - { - "epoch": 0.84, - "learning_rate": 3.820709412651738e-05, - "loss": 3.1149, - "step": 48480 - }, - { - "epoch": 0.84, - "learning_rate": 3.816692912086644e-05, - "loss": 3.0442, - "step": 48485 - }, - { - "epoch": 0.84, - "learning_rate": 3.8126783803720496e-05, - "loss": 3.1947, - "step": 48490 - }, - { - "epoch": 0.84, - "learning_rate": 3.808665817809846e-05, - "loss": 3.2723, - "step": 48495 - }, - { - "epoch": 0.84, - "learning_rate": 3.804655224701732e-05, - "loss": 3.2255, - "step": 48500 - }, - { - "epoch": 0.84, - "eval_loss": 3.165621519088745, - "eval_runtime": 150.6748, - "eval_samples_per_second": 12.218, - "eval_steps_per_second": 0.77, - "step": 48500 - }, - { - "epoch": 0.84, - "learning_rate": 3.800646601349301e-05, - "loss": 3.072, - "step": 48505 - }, - { - "epoch": 0.84, - "learning_rate": 3.796639948053977e-05, - "loss": 3.1245, - "step": 48510 - }, - { - "epoch": 0.84, - "learning_rate": 3.792635265117032e-05, - "loss": 2.9963, - "step": 48515 - }, - { - "epoch": 0.84, - "learning_rate": 3.7886325528396166e-05, - "loss": 3.1519, - "step": 48520 - }, - { - "epoch": 0.84, - "learning_rate": 3.784631811522696e-05, - "loss": 3.2753, - "step": 48525 - }, - { - "epoch": 0.84, - "learning_rate": 3.780633041467106e-05, - "loss": 2.9697, - "step": 48530 - }, - { - "epoch": 0.84, - "learning_rate": 3.7766362429735465e-05, - "loss": 3.0918, - "step": 48535 - }, - { - "epoch": 0.84, - "learning_rate": 3.772641416342547e-05, - "loss": 3.1775, - "step": 48540 - }, - { - "epoch": 0.84, - "learning_rate": 3.7686485618744996e-05, - "loss": 3.1495, - "step": 48545 - }, - { - "epoch": 0.84, - "learning_rate": 3.764657679869646e-05, - "loss": 3.1519, - "step": 48550 - }, - { - "epoch": 0.84, - "learning_rate": 3.760668770628078e-05, - "loss": 3.2319, - "step": 48555 - }, - { - "epoch": 0.84, - "learning_rate": 3.7566818344497465e-05, - "loss": 3.1863, - "step": 48560 - }, - { - "epoch": 0.84, - "learning_rate": 3.7526968716344506e-05, - "loss": 3.1556, - "step": 48565 - }, - { - "epoch": 0.84, - "learning_rate": 3.748713882481832e-05, - "loss": 3.165, - "step": 48570 - }, - { - "epoch": 0.84, - "learning_rate": 3.7447328672913934e-05, - "loss": 3.2209, - "step": 48575 - }, - { - "epoch": 0.84, - "learning_rate": 3.740753826362479e-05, - "loss": 3.1098, - "step": 48580 - }, - { - "epoch": 0.84, - "learning_rate": 3.73677675999431e-05, - "loss": 3.0798, - "step": 48585 - }, - { - "epoch": 0.84, - "learning_rate": 3.73280166848593e-05, - "loss": 3.1605, - "step": 48590 - }, - { - "epoch": 0.84, - "learning_rate": 3.728828552136249e-05, - "loss": 3.1955, - "step": 48595 - }, - { - "epoch": 0.84, - "learning_rate": 3.724857411244023e-05, - "loss": 3.1925, - "step": 48600 - }, - { - "epoch": 0.84, - "learning_rate": 3.720888246107859e-05, - "loss": 3.1132, - "step": 48605 - }, - { - "epoch": 0.84, - "learning_rate": 3.7169210570262184e-05, - "loss": 3.1987, - "step": 48610 - }, - { - "epoch": 0.84, - "learning_rate": 3.712955844297419e-05, - "loss": 3.0665, - "step": 48615 - }, - { - "epoch": 0.84, - "learning_rate": 3.708992608219623e-05, - "loss": 3.0131, - "step": 48620 - }, - { - "epoch": 0.84, - "learning_rate": 3.705031349090841e-05, - "loss": 3.1801, - "step": 48625 - }, - { - "epoch": 0.84, - "learning_rate": 3.701072067208942e-05, - "loss": 3.1396, - "step": 48630 - }, - { - "epoch": 0.84, - "learning_rate": 3.6971147628716417e-05, - "loss": 3.1894, - "step": 48635 - }, - { - "epoch": 0.84, - "learning_rate": 3.693159436376514e-05, - "loss": 3.1054, - "step": 48640 - }, - { - "epoch": 0.84, - "learning_rate": 3.689206088020975e-05, - "loss": 3.1274, - "step": 48645 - }, - { - "epoch": 0.84, - "learning_rate": 3.6852547181023004e-05, - "loss": 3.108, - "step": 48650 - }, - { - "epoch": 0.84, - "learning_rate": 3.681305326917609e-05, - "loss": 3.1307, - "step": 48655 - }, - { - "epoch": 0.84, - "learning_rate": 3.677357914763871e-05, - "loss": 3.214, - "step": 48660 - }, - { - "epoch": 0.84, - "learning_rate": 3.673412481937921e-05, - "loss": 3.0962, - "step": 48665 - }, - { - "epoch": 0.84, - "learning_rate": 3.669469028736428e-05, - "loss": 3.203, - "step": 48670 - }, - { - "epoch": 0.84, - "learning_rate": 3.665527555455926e-05, - "loss": 3.1129, - "step": 48675 - }, - { - "epoch": 0.84, - "learning_rate": 3.6615880623927906e-05, - "loss": 3.114, - "step": 48680 - }, - { - "epoch": 0.84, - "learning_rate": 3.6576505498432484e-05, - "loss": 3.0682, - "step": 48685 - }, - { - "epoch": 0.84, - "learning_rate": 3.653715018103376e-05, - "loss": 3.1253, - "step": 48690 - }, - { - "epoch": 0.84, - "learning_rate": 3.649781467469119e-05, - "loss": 3.1612, - "step": 48695 - }, - { - "epoch": 0.84, - "learning_rate": 3.6458498982362525e-05, - "loss": 3.1239, - "step": 48700 - }, - { - "epoch": 0.84, - "learning_rate": 3.641920310700413e-05, - "loss": 3.0634, - "step": 48705 - }, - { - "epoch": 0.84, - "learning_rate": 3.637992705157079e-05, - "loss": 3.1489, - "step": 48710 - }, - { - "epoch": 0.84, - "learning_rate": 3.634067081901586e-05, - "loss": 3.0635, - "step": 48715 - }, - { - "epoch": 0.84, - "learning_rate": 3.6301434412291306e-05, - "loss": 3.0972, - "step": 48720 - }, - { - "epoch": 0.84, - "learning_rate": 3.6262217834347477e-05, - "loss": 3.0815, - "step": 48725 - }, - { - "epoch": 0.84, - "learning_rate": 3.622302108813323e-05, - "loss": 3.1713, - "step": 48730 - }, - { - "epoch": 0.84, - "learning_rate": 3.6183844176595925e-05, - "loss": 3.1575, - "step": 48735 - }, - { - "epoch": 0.84, - "learning_rate": 3.614468710268146e-05, - "loss": 3.118, - "step": 48740 - }, - { - "epoch": 0.84, - "learning_rate": 3.610554986933435e-05, - "loss": 3.0587, - "step": 48745 - }, - { - "epoch": 0.84, - "learning_rate": 3.6066432479497543e-05, - "loss": 3.0999, - "step": 48750 - }, - { - "epoch": 0.84, - "learning_rate": 3.602733493611222e-05, - "loss": 3.0805, - "step": 48755 - }, - { - "epoch": 0.84, - "learning_rate": 3.5988257242118525e-05, - "loss": 3.0945, - "step": 48760 - }, - { - "epoch": 0.84, - "learning_rate": 3.594919940045488e-05, - "loss": 3.0203, - "step": 48765 - }, - { - "epoch": 0.84, - "learning_rate": 3.591016141405809e-05, - "loss": 3.1371, - "step": 48770 - }, - { - "epoch": 0.84, - "learning_rate": 3.587114328586389e-05, - "loss": 3.093, - "step": 48775 - }, - { - "epoch": 0.84, - "learning_rate": 3.583214501880592e-05, - "loss": 3.1092, - "step": 48780 - }, - { - "epoch": 0.84, - "learning_rate": 3.5793166615816915e-05, - "loss": 3.1984, - "step": 48785 - }, - { - "epoch": 0.84, - "learning_rate": 3.5754208079827684e-05, - "loss": 3.1765, - "step": 48790 - }, - { - "epoch": 0.84, - "learning_rate": 3.5715269413767757e-05, - "loss": 3.1786, - "step": 48795 - }, - { - "epoch": 0.84, - "learning_rate": 3.567635062056522e-05, - "loss": 3.1385, - "step": 48800 - }, - { - "epoch": 0.84, - "learning_rate": 3.5637451703146435e-05, - "loss": 3.1255, - "step": 48805 - }, - { - "epoch": 0.84, - "learning_rate": 3.5598572664436366e-05, - "loss": 3.1508, - "step": 48810 - }, - { - "epoch": 0.84, - "learning_rate": 3.555971350735867e-05, - "loss": 3.1419, - "step": 48815 - }, - { - "epoch": 0.84, - "learning_rate": 3.552087423483531e-05, - "loss": 3.1746, - "step": 48820 - }, - { - "epoch": 0.84, - "learning_rate": 3.5482054849786744e-05, - "loss": 3.083, - "step": 48825 - }, - { - "epoch": 0.85, - "learning_rate": 3.544325535513202e-05, - "loss": 3.0522, - "step": 48830 - }, - { - "epoch": 0.85, - "learning_rate": 3.540447575378859e-05, - "loss": 3.0806, - "step": 48835 - }, - { - "epoch": 0.85, - "learning_rate": 3.5365716048672624e-05, - "loss": 3.0787, - "step": 48840 - }, - { - "epoch": 0.85, - "learning_rate": 3.532697624269859e-05, - "loss": 3.1798, - "step": 48845 - }, - { - "epoch": 0.85, - "learning_rate": 3.528825633877953e-05, - "loss": 3.1439, - "step": 48850 - }, - { - "epoch": 0.85, - "learning_rate": 3.5249556339826944e-05, - "loss": 3.0276, - "step": 48855 - }, - { - "epoch": 0.85, - "learning_rate": 3.521087624875084e-05, - "loss": 3.1471, - "step": 48860 - }, - { - "epoch": 0.85, - "learning_rate": 3.517221606845986e-05, - "loss": 3.1667, - "step": 48865 - }, - { - "epoch": 0.85, - "learning_rate": 3.5133575801861036e-05, - "loss": 3.2375, - "step": 48870 - }, - { - "epoch": 0.85, - "learning_rate": 3.509495545185988e-05, - "loss": 3.1099, - "step": 48875 - }, - { - "epoch": 0.85, - "learning_rate": 3.5056355021360486e-05, - "loss": 3.2321, - "step": 48880 - }, - { - "epoch": 0.85, - "learning_rate": 3.501777451326534e-05, - "loss": 3.1319, - "step": 48885 - }, - { - "epoch": 0.85, - "learning_rate": 3.4979213930475506e-05, - "loss": 3.0698, - "step": 48890 - }, - { - "epoch": 0.85, - "learning_rate": 3.494067327589062e-05, - "loss": 3.2112, - "step": 48895 - }, - { - "epoch": 0.85, - "learning_rate": 3.49021525524087e-05, - "loss": 3.1327, - "step": 48900 - }, - { - "epoch": 0.85, - "learning_rate": 3.4863651762926305e-05, - "loss": 3.1745, - "step": 48905 - }, - { - "epoch": 0.85, - "learning_rate": 3.482517091033851e-05, - "loss": 3.1674, - "step": 48910 - }, - { - "epoch": 0.85, - "learning_rate": 3.4786709997538764e-05, - "loss": 3.1281, - "step": 48915 - }, - { - "epoch": 0.85, - "learning_rate": 3.4748269027419326e-05, - "loss": 3.1037, - "step": 48920 - }, - { - "epoch": 0.85, - "learning_rate": 3.470984800287066e-05, - "loss": 3.0013, - "step": 48925 - }, - { - "epoch": 0.85, - "learning_rate": 3.46714469267818e-05, - "loss": 3.0937, - "step": 48930 - }, - { - "epoch": 0.85, - "learning_rate": 3.463306580204038e-05, - "loss": 3.1225, - "step": 48935 - }, - { - "epoch": 0.85, - "learning_rate": 3.459470463153236e-05, - "loss": 3.1773, - "step": 48940 - }, - { - "epoch": 0.85, - "learning_rate": 3.455636341814241e-05, - "loss": 3.1932, - "step": 48945 - }, - { - "epoch": 0.85, - "learning_rate": 3.45180421647536e-05, - "loss": 3.0965, - "step": 48950 - }, - { - "epoch": 0.85, - "learning_rate": 3.447974087424741e-05, - "loss": 3.0238, - "step": 48955 - }, - { - "epoch": 0.85, - "learning_rate": 3.4441459549503924e-05, - "loss": 3.1729, - "step": 48960 - }, - { - "epoch": 0.85, - "learning_rate": 3.440319819340175e-05, - "loss": 3.0342, - "step": 48965 - }, - { - "epoch": 0.85, - "learning_rate": 3.436495680881781e-05, - "loss": 3.0191, - "step": 48970 - }, - { - "epoch": 0.85, - "learning_rate": 3.432673539862785e-05, - "loss": 3.2222, - "step": 48975 - }, - { - "epoch": 0.85, - "learning_rate": 3.428853396570582e-05, - "loss": 3.2052, - "step": 48980 - }, - { - "epoch": 0.85, - "learning_rate": 3.4250352512924307e-05, - "loss": 3.0527, - "step": 48985 - }, - { - "epoch": 0.85, - "learning_rate": 3.4212191043154336e-05, - "loss": 3.2075, - "step": 48990 - }, - { - "epoch": 0.85, - "learning_rate": 3.4174049559265385e-05, - "loss": 3.0892, - "step": 48995 - }, - { - "epoch": 0.85, - "learning_rate": 3.4135928064125615e-05, - "loss": 3.1558, - "step": 49000 - }, - { - "epoch": 0.85, - "eval_loss": 3.1634976863861084, - "eval_runtime": 151.0713, - "eval_samples_per_second": 12.186, - "eval_steps_per_second": 0.768, - "step": 49000 - }, - { - "epoch": 0.85, - "learning_rate": 3.409782656060155e-05, - "loss": 3.149, - "step": 49005 - }, - { - "epoch": 0.85, - "learning_rate": 3.405974505155818e-05, - "loss": 3.1727, - "step": 49010 - }, - { - "epoch": 0.85, - "learning_rate": 3.402168353985907e-05, - "loss": 3.1543, - "step": 49015 - }, - { - "epoch": 0.85, - "learning_rate": 3.398364202836621e-05, - "loss": 3.1762, - "step": 49020 - }, - { - "epoch": 0.85, - "learning_rate": 3.394562051994019e-05, - "loss": 3.1644, - "step": 49025 - }, - { - "epoch": 0.85, - "learning_rate": 3.390761901744005e-05, - "loss": 3.0792, - "step": 49030 - }, - { - "epoch": 0.85, - "learning_rate": 3.3869637523723134e-05, - "loss": 3.1231, - "step": 49035 - }, - { - "epoch": 0.85, - "learning_rate": 3.3831676041645674e-05, - "loss": 3.1345, - "step": 49040 - }, - { - "epoch": 0.85, - "learning_rate": 3.379373457406204e-05, - "loss": 3.096, - "step": 49045 - }, - { - "epoch": 0.85, - "learning_rate": 3.375581312382525e-05, - "loss": 3.1195, - "step": 49050 - }, - { - "epoch": 0.85, - "learning_rate": 3.371791169378696e-05, - "loss": 3.1191, - "step": 49055 - }, - { - "epoch": 0.85, - "learning_rate": 3.368003028679689e-05, - "loss": 3.0668, - "step": 49060 - }, - { - "epoch": 0.85, - "learning_rate": 3.364216890570376e-05, - "loss": 3.0021, - "step": 49065 - }, - { - "epoch": 0.85, - "learning_rate": 3.360432755335446e-05, - "loss": 3.0204, - "step": 49070 - }, - { - "epoch": 0.85, - "learning_rate": 3.356650623259438e-05, - "loss": 3.0765, - "step": 49075 - }, - { - "epoch": 0.85, - "learning_rate": 3.352870494626775e-05, - "loss": 3.0809, - "step": 49080 - }, - { - "epoch": 0.85, - "learning_rate": 3.349092369721672e-05, - "loss": 3.1173, - "step": 49085 - }, - { - "epoch": 0.85, - "learning_rate": 3.3453162488282425e-05, - "loss": 3.207, - "step": 49090 - }, - { - "epoch": 0.85, - "learning_rate": 3.3415421322304327e-05, - "loss": 3.1119, - "step": 49095 - }, - { - "epoch": 0.85, - "learning_rate": 3.337770020212025e-05, - "loss": 3.188, - "step": 49100 - }, - { - "epoch": 0.85, - "learning_rate": 3.3339999130566795e-05, - "loss": 3.1872, - "step": 49105 - }, - { - "epoch": 0.85, - "learning_rate": 3.330231811047876e-05, - "loss": 3.1816, - "step": 49110 - }, - { - "epoch": 0.85, - "learning_rate": 3.326465714468952e-05, - "loss": 3.0738, - "step": 49115 - }, - { - "epoch": 0.85, - "learning_rate": 3.3227016236031134e-05, - "loss": 3.0493, - "step": 49120 - }, - { - "epoch": 0.85, - "learning_rate": 3.318939538733394e-05, - "loss": 3.1476, - "step": 49125 - }, - { - "epoch": 0.85, - "learning_rate": 3.315179460142677e-05, - "loss": 3.2832, - "step": 49130 - }, - { - "epoch": 0.85, - "learning_rate": 3.3114213881137175e-05, - "loss": 3.3015, - "step": 49135 - }, - { - "epoch": 0.85, - "learning_rate": 3.307665322929085e-05, - "loss": 3.2245, - "step": 49140 - }, - { - "epoch": 0.85, - "learning_rate": 3.303911264871227e-05, - "loss": 3.1334, - "step": 49145 - }, - { - "epoch": 0.85, - "learning_rate": 3.3001592142224256e-05, - "loss": 3.0606, - "step": 49150 - }, - { - "epoch": 0.85, - "learning_rate": 3.2964091712648135e-05, - "loss": 2.9972, - "step": 49155 - }, - { - "epoch": 0.85, - "learning_rate": 3.292661136280389e-05, - "loss": 3.1674, - "step": 49160 - }, - { - "epoch": 0.85, - "learning_rate": 3.28891510955096e-05, - "loss": 3.1367, - "step": 49165 - }, - { - "epoch": 0.85, - "learning_rate": 3.285171091358232e-05, - "loss": 3.2221, - "step": 49170 - }, - { - "epoch": 0.85, - "learning_rate": 3.2814290819837255e-05, - "loss": 3.2278, - "step": 49175 - }, - { - "epoch": 0.85, - "learning_rate": 3.277689081708815e-05, - "loss": 3.1154, - "step": 49180 - }, - { - "epoch": 0.85, - "learning_rate": 3.273951090814747e-05, - "loss": 3.0598, - "step": 49185 - }, - { - "epoch": 0.85, - "learning_rate": 3.2702151095825826e-05, - "loss": 3.1559, - "step": 49190 - }, - { - "epoch": 0.85, - "learning_rate": 3.26648113829325e-05, - "loss": 3.1684, - "step": 49195 - }, - { - "epoch": 0.85, - "learning_rate": 3.262749177227532e-05, - "loss": 3.1717, - "step": 49200 - }, - { - "epoch": 0.85, - "learning_rate": 3.25901922666605e-05, - "loss": 3.1332, - "step": 49205 - }, - { - "epoch": 0.85, - "learning_rate": 3.25529128688928e-05, - "loss": 3.2295, - "step": 49210 - }, - { - "epoch": 0.85, - "learning_rate": 3.251565358177538e-05, - "loss": 3.1071, - "step": 49215 - }, - { - "epoch": 0.85, - "learning_rate": 3.2478414408109895e-05, - "loss": 3.2373, - "step": 49220 - }, - { - "epoch": 0.85, - "learning_rate": 3.2441195350696716e-05, - "loss": 3.1978, - "step": 49225 - }, - { - "epoch": 0.85, - "learning_rate": 3.240399641233442e-05, - "loss": 3.1092, - "step": 49230 - }, - { - "epoch": 0.85, - "learning_rate": 3.236681759582018e-05, - "loss": 3.1095, - "step": 49235 - }, - { - "epoch": 0.85, - "learning_rate": 3.232965890394966e-05, - "loss": 3.0972, - "step": 49240 - }, - { - "epoch": 0.85, - "learning_rate": 3.229252033951696e-05, - "loss": 3.2005, - "step": 49245 - }, - { - "epoch": 0.85, - "learning_rate": 3.225540190531477e-05, - "loss": 3.1714, - "step": 49250 - }, - { - "epoch": 0.85, - "learning_rate": 3.221830360413423e-05, - "loss": 3.0591, - "step": 49255 - }, - { - "epoch": 0.85, - "learning_rate": 3.218122543876488e-05, - "loss": 3.0512, - "step": 49260 - }, - { - "epoch": 0.85, - "learning_rate": 3.214416741199481e-05, - "loss": 3.072, - "step": 49265 - }, - { - "epoch": 0.85, - "learning_rate": 3.2107129526610645e-05, - "loss": 3.1319, - "step": 49270 - }, - { - "epoch": 0.85, - "learning_rate": 3.2070111785397334e-05, - "loss": 3.0662, - "step": 49275 - }, - { - "epoch": 0.85, - "learning_rate": 3.203311419113856e-05, - "loss": 3.0886, - "step": 49280 - }, - { - "epoch": 0.85, - "learning_rate": 3.199613674661627e-05, - "loss": 3.1146, - "step": 49285 - }, - { - "epoch": 0.85, - "learning_rate": 3.195917945461104e-05, - "loss": 3.2203, - "step": 49290 - }, - { - "epoch": 0.85, - "learning_rate": 3.1922242317901805e-05, - "loss": 3.2573, - "step": 49295 - }, - { - "epoch": 0.85, - "learning_rate": 3.188532533926601e-05, - "loss": 3.0809, - "step": 49300 - }, - { - "epoch": 0.85, - "learning_rate": 3.184842852147974e-05, - "loss": 3.0864, - "step": 49305 - }, - { - "epoch": 0.85, - "learning_rate": 3.1811551867317474e-05, - "loss": 3.0008, - "step": 49310 - }, - { - "epoch": 0.85, - "learning_rate": 3.177469537955193e-05, - "loss": 3.1571, - "step": 49315 - }, - { - "epoch": 0.85, - "learning_rate": 3.173785906095469e-05, - "loss": 3.1161, - "step": 49320 - }, - { - "epoch": 0.85, - "learning_rate": 3.170104291429566e-05, - "loss": 3.1041, - "step": 49325 - }, - { - "epoch": 0.85, - "learning_rate": 3.166424694234314e-05, - "loss": 3.1521, - "step": 49330 - }, - { - "epoch": 0.85, - "learning_rate": 3.1627471147864145e-05, - "loss": 3.1175, - "step": 49335 - }, - { - "epoch": 0.85, - "learning_rate": 3.1590715533623835e-05, - "loss": 3.1429, - "step": 49340 - }, - { - "epoch": 0.85, - "learning_rate": 3.155398010238619e-05, - "loss": 3.1026, - "step": 49345 - }, - { - "epoch": 0.85, - "learning_rate": 3.151726485691347e-05, - "loss": 3.0831, - "step": 49350 - }, - { - "epoch": 0.85, - "learning_rate": 3.148056979996644e-05, - "loss": 3.126, - "step": 49355 - }, - { - "epoch": 0.85, - "learning_rate": 3.1443894934304516e-05, - "loss": 3.0901, - "step": 49360 - }, - { - "epoch": 0.85, - "learning_rate": 3.140724026268526e-05, - "loss": 3.0804, - "step": 49365 - }, - { - "epoch": 0.85, - "learning_rate": 3.1370605787865075e-05, - "loss": 3.0141, - "step": 49370 - }, - { - "epoch": 0.85, - "learning_rate": 3.133399151259862e-05, - "loss": 3.0604, - "step": 49375 - }, - { - "epoch": 0.85, - "learning_rate": 3.129739743963906e-05, - "loss": 3.1119, - "step": 49380 - }, - { - "epoch": 0.85, - "learning_rate": 3.126082357173823e-05, - "loss": 3.1145, - "step": 49385 - }, - { - "epoch": 0.85, - "learning_rate": 3.122426991164621e-05, - "loss": 3.057, - "step": 49390 - }, - { - "epoch": 0.85, - "learning_rate": 3.1187736462111535e-05, - "loss": 3.0792, - "step": 49395 - }, - { - "epoch": 0.85, - "learning_rate": 3.115122322588152e-05, - "loss": 3.2464, - "step": 49400 - }, - { - "epoch": 0.86, - "learning_rate": 3.1114730205701664e-05, - "loss": 3.1848, - "step": 49405 - }, - { - "epoch": 0.86, - "learning_rate": 3.107825740431602e-05, - "loss": 3.0722, - "step": 49410 - }, - { - "epoch": 0.86, - "learning_rate": 3.1041804824467355e-05, - "loss": 3.1624, - "step": 49415 - }, - { - "epoch": 0.86, - "learning_rate": 3.1005372468896436e-05, - "loss": 3.158, - "step": 49420 - }, - { - "epoch": 0.86, - "learning_rate": 3.096896034034302e-05, - "loss": 3.1386, - "step": 49425 - }, - { - "epoch": 0.86, - "learning_rate": 3.0932568441545e-05, - "loss": 3.1838, - "step": 49430 - }, - { - "epoch": 0.86, - "learning_rate": 3.0896196775238834e-05, - "loss": 3.1638, - "step": 49435 - }, - { - "epoch": 0.86, - "learning_rate": 3.0859845344159626e-05, - "loss": 3.1109, - "step": 49440 - }, - { - "epoch": 0.86, - "learning_rate": 3.0823514151040664e-05, - "loss": 3.125, - "step": 49445 - }, - { - "epoch": 0.86, - "learning_rate": 3.078720319861395e-05, - "loss": 3.0291, - "step": 49450 - }, - { - "epoch": 0.86, - "learning_rate": 3.075091248960988e-05, - "loss": 3.0649, - "step": 49455 - }, - { - "epoch": 0.86, - "learning_rate": 3.071464202675723e-05, - "loss": 3.0849, - "step": 49460 - }, - { - "epoch": 0.86, - "learning_rate": 3.0678391812783586e-05, - "loss": 3.1765, - "step": 49465 - }, - { - "epoch": 0.86, - "learning_rate": 3.0642161850414536e-05, - "loss": 3.2217, - "step": 49470 - }, - { - "epoch": 0.86, - "learning_rate": 3.0605952142374434e-05, - "loss": 3.0691, - "step": 49475 - }, - { - "epoch": 0.86, - "learning_rate": 3.056976269138617e-05, - "loss": 3.179, - "step": 49480 - }, - { - "epoch": 0.86, - "learning_rate": 3.0533593500170925e-05, - "loss": 3.1283, - "step": 49485 - }, - { - "epoch": 0.86, - "learning_rate": 3.049744457144847e-05, - "loss": 3.0821, - "step": 49490 - }, - { - "epoch": 0.86, - "learning_rate": 3.0461315907936988e-05, - "loss": 2.9608, - "step": 49495 - }, - { - "epoch": 0.86, - "learning_rate": 3.0425207512353144e-05, - "loss": 3.1708, - "step": 49500 - }, - { - "epoch": 0.86, - "eval_loss": 3.161355495452881, - "eval_runtime": 151.0729, - "eval_samples_per_second": 12.186, - "eval_steps_per_second": 0.768, - "step": 49500 - }, - { - "epoch": 0.86, - "learning_rate": 3.0389119387412232e-05, - "loss": 3.1972, - "step": 49505 - }, - { - "epoch": 0.86, - "learning_rate": 3.0353051535827776e-05, - "loss": 3.2604, - "step": 49510 - }, - { - "epoch": 0.86, - "learning_rate": 3.0317003960311914e-05, - "loss": 3.0992, - "step": 49515 - }, - { - "epoch": 0.86, - "learning_rate": 3.0280976663575275e-05, - "loss": 3.1832, - "step": 49520 - }, - { - "epoch": 0.86, - "learning_rate": 3.0244969648326822e-05, - "loss": 3.1101, - "step": 49525 - }, - { - "epoch": 0.86, - "learning_rate": 3.020898291727426e-05, - "loss": 3.2827, - "step": 49530 - }, - { - "epoch": 0.86, - "learning_rate": 3.017301647312349e-05, - "loss": 3.2216, - "step": 49535 - }, - { - "epoch": 0.86, - "learning_rate": 3.013707031857908e-05, - "loss": 3.2332, - "step": 49540 - }, - { - "epoch": 0.86, - "learning_rate": 3.010114445634394e-05, - "loss": 3.1223, - "step": 49545 - }, - { - "epoch": 0.86, - "learning_rate": 3.0065238889119536e-05, - "loss": 3.1019, - "step": 49550 - }, - { - "epoch": 0.86, - "learning_rate": 3.002935361960571e-05, - "loss": 3.1655, - "step": 49555 - }, - { - "epoch": 0.86, - "learning_rate": 2.9993488650500975e-05, - "loss": 3.3006, - "step": 49560 - }, - { - "epoch": 0.86, - "learning_rate": 2.99576439845021e-05, - "loss": 3.1436, - "step": 49565 - }, - { - "epoch": 0.86, - "learning_rate": 2.99218196243045e-05, - "loss": 3.0918, - "step": 49570 - }, - { - "epoch": 0.86, - "learning_rate": 2.988601557260192e-05, - "loss": 3.0952, - "step": 49575 - }, - { - "epoch": 0.86, - "learning_rate": 2.9850231832086603e-05, - "loss": 3.0979, - "step": 49580 - }, - { - "epoch": 0.86, - "learning_rate": 2.98144684054494e-05, - "loss": 3.0616, - "step": 49585 - }, - { - "epoch": 0.86, - "learning_rate": 2.977872529537949e-05, - "loss": 3.1357, - "step": 49590 - }, - { - "epoch": 0.86, - "learning_rate": 2.9743002504564596e-05, - "loss": 3.0553, - "step": 49595 - }, - { - "epoch": 0.86, - "learning_rate": 2.970730003569083e-05, - "loss": 3.0758, - "step": 49600 - }, - { - "epoch": 0.86, - "learning_rate": 2.9671617891442846e-05, - "loss": 3.2001, - "step": 49605 - }, - { - "epoch": 0.86, - "learning_rate": 2.9635956074503865e-05, - "loss": 3.1336, - "step": 49610 - }, - { - "epoch": 0.86, - "learning_rate": 2.960031458755534e-05, - "loss": 3.0801, - "step": 49615 - }, - { - "epoch": 0.86, - "learning_rate": 2.956469343327743e-05, - "loss": 3.2049, - "step": 49620 - }, - { - "epoch": 0.86, - "learning_rate": 2.9529092614348592e-05, - "loss": 3.1411, - "step": 49625 - }, - { - "epoch": 0.86, - "learning_rate": 2.949351213344585e-05, - "loss": 3.1156, - "step": 49630 - }, - { - "epoch": 0.86, - "learning_rate": 2.945795199324463e-05, - "loss": 3.2328, - "step": 49635 - }, - { - "epoch": 0.86, - "learning_rate": 2.9422412196418932e-05, - "loss": 3.1396, - "step": 49640 - }, - { - "epoch": 0.86, - "learning_rate": 2.938689274564118e-05, - "loss": 3.0213, - "step": 49645 - }, - { - "epoch": 0.86, - "learning_rate": 2.9351393643582232e-05, - "loss": 3.1359, - "step": 49650 - }, - { - "epoch": 0.86, - "learning_rate": 2.9315914892911396e-05, - "loss": 3.1194, - "step": 49655 - }, - { - "epoch": 0.86, - "learning_rate": 2.92804564962965e-05, - "loss": 3.135, - "step": 49660 - }, - { - "epoch": 0.86, - "learning_rate": 2.924501845640391e-05, - "loss": 3.1446, - "step": 49665 - }, - { - "epoch": 0.86, - "learning_rate": 2.9209600775898333e-05, - "loss": 3.1923, - "step": 49670 - }, - { - "epoch": 0.86, - "learning_rate": 2.9174203457443e-05, - "loss": 3.1586, - "step": 49675 - }, - { - "epoch": 0.86, - "learning_rate": 2.9138826503699585e-05, - "loss": 3.2051, - "step": 49680 - }, - { - "epoch": 0.86, - "learning_rate": 2.9103469917328227e-05, - "loss": 3.1811, - "step": 49685 - }, - { - "epoch": 0.86, - "learning_rate": 2.9068133700987693e-05, - "loss": 3.1168, - "step": 49690 - }, - { - "epoch": 0.86, - "learning_rate": 2.9032817857335034e-05, - "loss": 3.2389, - "step": 49695 - }, - { - "epoch": 0.86, - "learning_rate": 2.8997522389025683e-05, - "loss": 3.1057, - "step": 49700 - }, - { - "epoch": 0.86, - "learning_rate": 2.8962247298713827e-05, - "loss": 3.0749, - "step": 49705 - }, - { - "epoch": 0.86, - "learning_rate": 2.8926992589051934e-05, - "loss": 3.1406, - "step": 49710 - }, - { - "epoch": 0.86, - "learning_rate": 2.8891758262690923e-05, - "loss": 3.1404, - "step": 49715 - }, - { - "epoch": 0.86, - "learning_rate": 2.8856544322280374e-05, - "loss": 3.1554, - "step": 49720 - }, - { - "epoch": 0.86, - "learning_rate": 2.882135077046803e-05, - "loss": 3.0868, - "step": 49725 - }, - { - "epoch": 0.86, - "learning_rate": 2.878617760990035e-05, - "loss": 3.2024, - "step": 49730 - }, - { - "epoch": 0.86, - "learning_rate": 2.8751024843222182e-05, - "loss": 3.0496, - "step": 49735 - }, - { - "epoch": 0.86, - "learning_rate": 2.8715892473076773e-05, - "loss": 3.0333, - "step": 49740 - }, - { - "epoch": 0.86, - "learning_rate": 2.8680780502106047e-05, - "loss": 3.192, - "step": 49745 - }, - { - "epoch": 0.86, - "learning_rate": 2.864568893295003e-05, - "loss": 3.0176, - "step": 49750 - }, - { - "epoch": 0.86, - "learning_rate": 2.86106177682476e-05, - "loss": 3.0381, - "step": 49755 - }, - { - "epoch": 0.86, - "learning_rate": 2.8575567010635824e-05, - "loss": 3.1464, - "step": 49760 - }, - { - "epoch": 0.86, - "learning_rate": 2.8540536662750356e-05, - "loss": 3.1332, - "step": 49765 - }, - { - "epoch": 0.86, - "learning_rate": 2.8505526727225457e-05, - "loss": 3.1255, - "step": 49770 - }, - { - "epoch": 0.86, - "learning_rate": 2.8470537206693457e-05, - "loss": 3.1313, - "step": 49775 - }, - { - "epoch": 0.86, - "learning_rate": 2.843556810378548e-05, - "loss": 3.1056, - "step": 49780 - }, - { - "epoch": 0.86, - "learning_rate": 2.8400619421131055e-05, - "loss": 3.2543, - "step": 49785 - }, - { - "epoch": 0.86, - "learning_rate": 2.836569116135815e-05, - "loss": 3.1408, - "step": 49790 - }, - { - "epoch": 0.86, - "learning_rate": 2.8330783327093164e-05, - "loss": 3.1786, - "step": 49795 - }, - { - "epoch": 0.86, - "learning_rate": 2.8295895920960964e-05, - "loss": 3.1208, - "step": 49800 - }, - { - "epoch": 0.86, - "learning_rate": 2.8261028945584918e-05, - "loss": 3.175, - "step": 49805 - }, - { - "epoch": 0.86, - "learning_rate": 2.822618240358686e-05, - "loss": 3.1772, - "step": 49810 - }, - { - "epoch": 0.86, - "learning_rate": 2.8191356297587098e-05, - "loss": 3.0913, - "step": 49815 - }, - { - "epoch": 0.86, - "learning_rate": 2.8156550630204335e-05, - "loss": 3.159, - "step": 49820 - }, - { - "epoch": 0.86, - "learning_rate": 2.8121765404055807e-05, - "loss": 3.0983, - "step": 49825 - }, - { - "epoch": 0.86, - "learning_rate": 2.8087000621757095e-05, - "loss": 3.0529, - "step": 49830 - }, - { - "epoch": 0.86, - "learning_rate": 2.805225628592247e-05, - "loss": 3.1672, - "step": 49835 - }, - { - "epoch": 0.86, - "learning_rate": 2.8017532399164444e-05, - "loss": 3.0621, - "step": 49840 - }, - { - "epoch": 0.86, - "learning_rate": 2.7982828964094062e-05, - "loss": 3.1787, - "step": 49845 - }, - { - "epoch": 0.86, - "learning_rate": 2.794814598332097e-05, - "loss": 3.1399, - "step": 49850 - }, - { - "epoch": 0.86, - "learning_rate": 2.7913483459452978e-05, - "loss": 3.1923, - "step": 49855 - }, - { - "epoch": 0.86, - "learning_rate": 2.7878841395096608e-05, - "loss": 3.1725, - "step": 49860 - }, - { - "epoch": 0.86, - "learning_rate": 2.784421979285677e-05, - "loss": 3.0428, - "step": 49865 - }, - { - "epoch": 0.86, - "learning_rate": 2.7809618655336853e-05, - "loss": 3.1466, - "step": 49870 - }, - { - "epoch": 0.86, - "learning_rate": 2.777503798513867e-05, - "loss": 3.1575, - "step": 49875 - }, - { - "epoch": 0.86, - "learning_rate": 2.774047778486248e-05, - "loss": 3.1834, - "step": 49880 - }, - { - "epoch": 0.86, - "learning_rate": 2.7705938057107e-05, - "loss": 3.1212, - "step": 49885 - }, - { - "epoch": 0.86, - "learning_rate": 2.767141880446956e-05, - "loss": 3.1934, - "step": 49890 - }, - { - "epoch": 0.86, - "learning_rate": 2.763692002954574e-05, - "loss": 3.1929, - "step": 49895 - }, - { - "epoch": 0.86, - "learning_rate": 2.7602441734929737e-05, - "loss": 3.1728, - "step": 49900 - }, - { - "epoch": 0.86, - "learning_rate": 2.7567983923214075e-05, - "loss": 3.2099, - "step": 49905 - }, - { - "epoch": 0.86, - "learning_rate": 2.753354659698982e-05, - "loss": 3.1773, - "step": 49910 - }, - { - "epoch": 0.86, - "learning_rate": 2.7499129758846428e-05, - "loss": 3.1222, - "step": 49915 - }, - { - "epoch": 0.86, - "learning_rate": 2.7464733411372032e-05, - "loss": 3.0571, - "step": 49920 - }, - { - "epoch": 0.86, - "learning_rate": 2.7430357557152926e-05, - "loss": 3.1069, - "step": 49925 - }, - { - "epoch": 0.86, - "learning_rate": 2.7396002198774047e-05, - "loss": 3.1298, - "step": 49930 - }, - { - "epoch": 0.86, - "learning_rate": 2.7361667338818728e-05, - "loss": 3.1509, - "step": 49935 - }, - { - "epoch": 0.86, - "learning_rate": 2.7327352979868734e-05, - "loss": 3.0369, - "step": 49940 - }, - { - "epoch": 0.86, - "learning_rate": 2.7293059124504403e-05, - "loss": 3.0928, - "step": 49945 - }, - { - "epoch": 0.86, - "learning_rate": 2.725878577530444e-05, - "loss": 3.2015, - "step": 49950 - }, - { - "epoch": 0.86, - "learning_rate": 2.722453293484601e-05, - "loss": 3.0879, - "step": 49955 - }, - { - "epoch": 0.86, - "learning_rate": 2.719030060570473e-05, - "loss": 3.0246, - "step": 49960 - }, - { - "epoch": 0.86, - "learning_rate": 2.7156088790454666e-05, - "loss": 3.059, - "step": 49965 - }, - { - "epoch": 0.86, - "learning_rate": 2.7121897491668498e-05, - "loss": 3.2224, - "step": 49970 - }, - { - "epoch": 0.86, - "learning_rate": 2.7087726711917202e-05, - "loss": 3.1346, - "step": 49975 - }, - { - "epoch": 0.86, - "learning_rate": 2.705357645377012e-05, - "loss": 3.0982, - "step": 49980 - }, - { - "epoch": 0.87, - "learning_rate": 2.7019446719795266e-05, - "loss": 3.1323, - "step": 49985 - }, - { - "epoch": 0.87, - "learning_rate": 2.698533751255906e-05, - "loss": 3.1328, - "step": 49990 - }, - { - "epoch": 0.87, - "learning_rate": 2.6951248834626237e-05, - "loss": 3.1338, - "step": 49995 - }, - { - "epoch": 0.87, - "learning_rate": 2.6917180688560224e-05, - "loss": 3.1056, - "step": 50000 - }, - { - "epoch": 0.87, - "eval_loss": 3.159515380859375, - "eval_runtime": 151.4692, - "eval_samples_per_second": 12.154, - "eval_steps_per_second": 0.766, - "step": 50000 - }, - { - "epoch": 0.87, - "learning_rate": 2.6883133076922603e-05, - "loss": 2.9081, - "step": 50005 - }, - { - "epoch": 0.87, - "learning_rate": 2.6849106002273758e-05, - "loss": 3.2069, - "step": 50010 - }, - { - "epoch": 0.87, - "learning_rate": 2.681509946717224e-05, - "loss": 3.1787, - "step": 50015 - }, - { - "epoch": 0.87, - "learning_rate": 2.6781113474175108e-05, - "loss": 3.1832, - "step": 50020 - }, - { - "epoch": 0.87, - "learning_rate": 2.6747148025838182e-05, - "loss": 3.2865, - "step": 50025 - }, - { - "epoch": 0.87, - "learning_rate": 2.6713203124715188e-05, - "loss": 3.0681, - "step": 50030 - }, - { - "epoch": 0.87, - "learning_rate": 2.667927877335878e-05, - "loss": 3.2357, - "step": 50035 - }, - { - "epoch": 0.87, - "learning_rate": 2.6645374974319888e-05, - "loss": 3.0369, - "step": 50040 - }, - { - "epoch": 0.87, - "learning_rate": 2.6611491730147838e-05, - "loss": 3.1996, - "step": 50045 - }, - { - "epoch": 0.87, - "learning_rate": 2.657762904339059e-05, - "loss": 3.122, - "step": 50050 - }, - { - "epoch": 0.87, - "learning_rate": 2.6543786916594345e-05, - "loss": 3.1339, - "step": 50055 - }, - { - "epoch": 0.87, - "learning_rate": 2.650996535230383e-05, - "loss": 3.1953, - "step": 50060 - }, - { - "epoch": 0.87, - "learning_rate": 2.6476164353062347e-05, - "loss": 3.1142, - "step": 50065 - }, - { - "epoch": 0.87, - "learning_rate": 2.6442383921411493e-05, - "loss": 3.1482, - "step": 50070 - }, - { - "epoch": 0.87, - "learning_rate": 2.6408624059891405e-05, - "loss": 3.1586, - "step": 50075 - }, - { - "epoch": 0.87, - "learning_rate": 2.6374884771040716e-05, - "loss": 3.2238, - "step": 50080 - }, - { - "epoch": 0.87, - "learning_rate": 2.6341166057396302e-05, - "loss": 3.2036, - "step": 50085 - }, - { - "epoch": 0.87, - "learning_rate": 2.6307467921493762e-05, - "loss": 3.1131, - "step": 50090 - }, - { - "epoch": 0.87, - "learning_rate": 2.627379036586701e-05, - "loss": 3.1085, - "step": 50095 - }, - { - "epoch": 0.87, - "learning_rate": 2.6240133393048312e-05, - "loss": 3.1046, - "step": 50100 - }, - { - "epoch": 0.87, - "learning_rate": 2.6206497005568715e-05, - "loss": 3.2293, - "step": 50105 - }, - { - "epoch": 0.87, - "learning_rate": 2.6172881205957263e-05, - "loss": 3.1353, - "step": 50110 - }, - { - "epoch": 0.87, - "learning_rate": 2.613928599674183e-05, - "loss": 3.138, - "step": 50115 - }, - { - "epoch": 0.87, - "learning_rate": 2.6105711380448602e-05, - "loss": 3.1761, - "step": 50120 - }, - { - "epoch": 0.87, - "learning_rate": 2.6072157359602152e-05, - "loss": 3.2065, - "step": 50125 - }, - { - "epoch": 0.87, - "learning_rate": 2.60386239367257e-05, - "loss": 3.1544, - "step": 50130 - }, - { - "epoch": 0.87, - "learning_rate": 2.6005111114340628e-05, - "loss": 3.1291, - "step": 50135 - }, - { - "epoch": 0.87, - "learning_rate": 2.597161889496695e-05, - "loss": 3.0625, - "step": 50140 - }, - { - "epoch": 0.87, - "learning_rate": 2.5938147281123222e-05, - "loss": 3.2257, - "step": 50145 - }, - { - "epoch": 0.87, - "learning_rate": 2.5904696275326296e-05, - "loss": 3.1269, - "step": 50150 - }, - { - "epoch": 0.87, - "learning_rate": 2.5871265880091462e-05, - "loss": 3.2053, - "step": 50155 - }, - { - "epoch": 0.87, - "learning_rate": 2.5837856097932576e-05, - "loss": 3.2384, - "step": 50160 - }, - { - "epoch": 0.87, - "learning_rate": 2.5804466931361756e-05, - "loss": 3.1289, - "step": 50165 - }, - { - "epoch": 0.87, - "learning_rate": 2.5771098382889866e-05, - "loss": 3.108, - "step": 50170 - }, - { - "epoch": 0.87, - "learning_rate": 2.5737750455025997e-05, - "loss": 3.0458, - "step": 50175 - }, - { - "epoch": 0.87, - "learning_rate": 2.570442315027771e-05, - "loss": 3.2086, - "step": 50180 - }, - { - "epoch": 0.87, - "learning_rate": 2.567111647115103e-05, - "loss": 3.0521, - "step": 50185 - }, - { - "epoch": 0.87, - "learning_rate": 2.5637830420150462e-05, - "loss": 3.13, - "step": 50190 - }, - { - "epoch": 0.87, - "learning_rate": 2.5604564999779e-05, - "loss": 3.0953, - "step": 50195 - }, - { - "epoch": 0.87, - "learning_rate": 2.5571320212538003e-05, - "loss": 3.0438, - "step": 50200 - }, - { - "epoch": 0.87, - "learning_rate": 2.5538096060927315e-05, - "loss": 2.9951, - "step": 50205 - }, - { - "epoch": 0.87, - "learning_rate": 2.5504892547445165e-05, - "loss": 3.152, - "step": 50210 - }, - { - "epoch": 0.87, - "learning_rate": 2.547170967458836e-05, - "loss": 3.0814, - "step": 50215 - }, - { - "epoch": 0.87, - "learning_rate": 2.5438547444852e-05, - "loss": 3.1292, - "step": 50220 - }, - { - "epoch": 0.87, - "learning_rate": 2.5405405860729822e-05, - "loss": 3.01, - "step": 50225 - }, - { - "epoch": 0.87, - "learning_rate": 2.5372284924713838e-05, - "loss": 3.1121, - "step": 50230 - }, - { - "epoch": 0.87, - "learning_rate": 2.5339184639294586e-05, - "loss": 3.0874, - "step": 50235 - }, - { - "epoch": 0.87, - "learning_rate": 2.530610500696104e-05, - "loss": 3.0359, - "step": 50240 - }, - { - "epoch": 0.87, - "learning_rate": 2.527304603020055e-05, - "loss": 3.1531, - "step": 50245 - }, - { - "epoch": 0.87, - "learning_rate": 2.5240007711499122e-05, - "loss": 3.1489, - "step": 50250 - }, - { - "epoch": 0.87, - "learning_rate": 2.520699005334097e-05, - "loss": 3.1096, - "step": 50255 - }, - { - "epoch": 0.87, - "learning_rate": 2.5173993058208874e-05, - "loss": 3.147, - "step": 50260 - }, - { - "epoch": 0.87, - "learning_rate": 2.5141016728584052e-05, - "loss": 3.0463, - "step": 50265 - }, - { - "epoch": 0.87, - "learning_rate": 2.510806106694605e-05, - "loss": 2.9812, - "step": 50270 - }, - { - "epoch": 0.87, - "learning_rate": 2.5075126075773156e-05, - "loss": 3.179, - "step": 50275 - }, - { - "epoch": 0.87, - "learning_rate": 2.5042211757541852e-05, - "loss": 3.1458, - "step": 50280 - }, - { - "epoch": 0.87, - "learning_rate": 2.5009318114726995e-05, - "loss": 2.9818, - "step": 50285 - }, - { - "epoch": 0.87, - "learning_rate": 2.4976445149802137e-05, - "loss": 3.1364, - "step": 50290 - }, - { - "epoch": 0.87, - "learning_rate": 2.4943592865239135e-05, - "loss": 3.2002, - "step": 50295 - }, - { - "epoch": 0.87, - "learning_rate": 2.491076126350825e-05, - "loss": 3.0954, - "step": 50300 - }, - { - "epoch": 0.87, - "learning_rate": 2.4877950347078402e-05, - "loss": 3.1573, - "step": 50305 - }, - { - "epoch": 0.87, - "learning_rate": 2.484516011841665e-05, - "loss": 3.1059, - "step": 50310 - }, - { - "epoch": 0.87, - "learning_rate": 2.481239057998876e-05, - "loss": 3.1351, - "step": 50315 - }, - { - "epoch": 0.87, - "learning_rate": 2.4779641734258758e-05, - "loss": 3.0687, - "step": 50320 - }, - { - "epoch": 0.87, - "learning_rate": 2.4746913583689175e-05, - "loss": 2.9391, - "step": 50325 - }, - { - "epoch": 0.87, - "learning_rate": 2.4714206130741078e-05, - "loss": 3.0373, - "step": 50330 - }, - { - "epoch": 0.87, - "learning_rate": 2.4681519377873893e-05, - "loss": 3.1751, - "step": 50335 - }, - { - "epoch": 0.87, - "learning_rate": 2.4648853327545427e-05, - "loss": 3.0945, - "step": 50340 - }, - { - "epoch": 0.87, - "learning_rate": 2.4616207982212076e-05, - "loss": 3.0537, - "step": 50345 - }, - { - "epoch": 0.87, - "learning_rate": 2.4583583344328485e-05, - "loss": 3.1123, - "step": 50350 - }, - { - "epoch": 0.87, - "learning_rate": 2.4550979416348015e-05, - "loss": 3.0615, - "step": 50355 - }, - { - "epoch": 0.87, - "learning_rate": 2.451839620072228e-05, - "loss": 3.1374, - "step": 50360 - }, - { - "epoch": 0.87, - "learning_rate": 2.4485833699901212e-05, - "loss": 3.1211, - "step": 50365 - }, - { - "epoch": 0.87, - "learning_rate": 2.4453291916333528e-05, - "loss": 3.1406, - "step": 50370 - }, - { - "epoch": 0.87, - "learning_rate": 2.442077085246613e-05, - "loss": 3.162, - "step": 50375 - }, - { - "epoch": 0.87, - "learning_rate": 2.43882705107444e-05, - "loss": 3.1189, - "step": 50380 - }, - { - "epoch": 0.87, - "learning_rate": 2.4355790893612347e-05, - "loss": 3.0061, - "step": 50385 - }, - { - "epoch": 0.87, - "learning_rate": 2.432333200351205e-05, - "loss": 3.1955, - "step": 50390 - }, - { - "epoch": 0.87, - "learning_rate": 2.429089384288443e-05, - "loss": 3.2551, - "step": 50395 - }, - { - "epoch": 0.87, - "learning_rate": 2.4258476414168594e-05, - "loss": 2.9694, - "step": 50400 - }, - { - "epoch": 0.87, - "learning_rate": 2.4226079719802128e-05, - "loss": 3.0898, - "step": 50405 - }, - { - "epoch": 0.87, - "learning_rate": 2.419370376222125e-05, - "loss": 3.2238, - "step": 50410 - }, - { - "epoch": 0.87, - "learning_rate": 2.4161348543860316e-05, - "loss": 3.0639, - "step": 50415 - }, - { - "epoch": 0.87, - "learning_rate": 2.4129014067152275e-05, - "loss": 3.2044, - "step": 50420 - }, - { - "epoch": 0.87, - "learning_rate": 2.4096700334528618e-05, - "loss": 3.0987, - "step": 50425 - }, - { - "epoch": 0.87, - "learning_rate": 2.40644073484191e-05, - "loss": 3.1207, - "step": 50430 - }, - { - "epoch": 0.87, - "learning_rate": 2.4032135111251983e-05, - "loss": 3.0766, - "step": 50435 - }, - { - "epoch": 0.87, - "learning_rate": 2.3999883625454018e-05, - "loss": 3.0879, - "step": 50440 - }, - { - "epoch": 0.87, - "learning_rate": 2.3967652893450274e-05, - "loss": 3.153, - "step": 50445 - }, - { - "epoch": 0.87, - "learning_rate": 2.393544291766444e-05, - "loss": 3.1683, - "step": 50450 - }, - { - "epoch": 0.87, - "learning_rate": 2.3903253700518477e-05, - "loss": 3.1308, - "step": 50455 - }, - { - "epoch": 0.87, - "learning_rate": 2.3871085244432887e-05, - "loss": 3.1629, - "step": 50460 - }, - { - "epoch": 0.87, - "learning_rate": 2.383893755182653e-05, - "loss": 3.0093, - "step": 50465 - }, - { - "epoch": 0.87, - "learning_rate": 2.3806810625116713e-05, - "loss": 3.0746, - "step": 50470 - }, - { - "epoch": 0.87, - "learning_rate": 2.377470446671933e-05, - "loss": 3.1127, - "step": 50475 - }, - { - "epoch": 0.87, - "learning_rate": 2.374261907904852e-05, - "loss": 2.9787, - "step": 50480 - }, - { - "epoch": 0.87, - "learning_rate": 2.3710554464516985e-05, - "loss": 3.0884, - "step": 50485 - }, - { - "epoch": 0.87, - "learning_rate": 2.3678510625535794e-05, - "loss": 3.1472, - "step": 50490 - }, - { - "epoch": 0.87, - "learning_rate": 2.364648756451446e-05, - "loss": 3.0945, - "step": 50495 - }, - { - "epoch": 0.87, - "learning_rate": 2.3614485283860918e-05, - "loss": 2.9874, - "step": 50500 - }, - { - "epoch": 0.87, - "eval_loss": 3.15816330909729, - "eval_runtime": 151.1837, - "eval_samples_per_second": 12.177, - "eval_steps_per_second": 0.767, - "step": 50500 - } - ], - "logging_steps": 5, - "max_steps": 57783, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 1.500304248346835e+18, - "trial_name": null, - "trial_params": null -}