{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 7250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00027586206896551725, "grad_norm": 11.61561393737793, "learning_rate": 1.1025358324145536e-08, "loss": 1.6007, "step": 1 }, { "epoch": 0.0005517241379310345, "grad_norm": 13.754886627197266, "learning_rate": 2.205071664829107e-08, "loss": 1.5575, "step": 2 }, { "epoch": 0.0008275862068965517, "grad_norm": 13.544987678527832, "learning_rate": 3.307607497243661e-08, "loss": 1.5486, "step": 3 }, { "epoch": 0.001103448275862069, "grad_norm": 10.528044700622559, "learning_rate": 4.410143329658214e-08, "loss": 1.499, "step": 4 }, { "epoch": 0.001379310344827586, "grad_norm": 12.478899002075195, "learning_rate": 5.512679162072768e-08, "loss": 1.6523, "step": 5 }, { "epoch": 0.0016551724137931034, "grad_norm": 14.603178024291992, "learning_rate": 6.615214994487321e-08, "loss": 1.543, "step": 6 }, { "epoch": 0.0019310344827586207, "grad_norm": 9.980120658874512, "learning_rate": 7.717750826901874e-08, "loss": 1.6589, "step": 7 }, { "epoch": 0.002206896551724138, "grad_norm": 13.890907287597656, "learning_rate": 8.820286659316428e-08, "loss": 1.712, "step": 8 }, { "epoch": 0.0024827586206896553, "grad_norm": 13.227901458740234, "learning_rate": 9.922822491730983e-08, "loss": 1.446, "step": 9 }, { "epoch": 0.002758620689655172, "grad_norm": 11.885655403137207, "learning_rate": 1.1025358324145536e-07, "loss": 1.379, "step": 10 }, { "epoch": 0.0030344827586206895, "grad_norm": 13.944063186645508, "learning_rate": 1.2127894156560089e-07, "loss": 1.666, "step": 11 }, { "epoch": 0.003310344827586207, "grad_norm": 12.290889739990234, "learning_rate": 1.3230429988974643e-07, "loss": 1.3787, "step": 12 }, { "epoch": 0.003586206896551724, "grad_norm": 11.835646629333496, "learning_rate": 1.4332965821389197e-07, "loss": 1.4, "step": 13 }, { "epoch": 0.0038620689655172414, "grad_norm": 12.398616790771484, "learning_rate": 1.5435501653803749e-07, "loss": 1.5229, "step": 14 }, { "epoch": 0.004137931034482759, "grad_norm": 10.99663257598877, "learning_rate": 1.6538037486218305e-07, "loss": 1.6727, "step": 15 }, { "epoch": 0.004413793103448276, "grad_norm": 14.649194717407227, "learning_rate": 1.7640573318632857e-07, "loss": 1.6946, "step": 16 }, { "epoch": 0.004689655172413793, "grad_norm": 12.16015338897705, "learning_rate": 1.8743109151047409e-07, "loss": 1.6758, "step": 17 }, { "epoch": 0.004965517241379311, "grad_norm": 12.593822479248047, "learning_rate": 1.9845644983461965e-07, "loss": 1.5959, "step": 18 }, { "epoch": 0.005241379310344828, "grad_norm": 10.60006046295166, "learning_rate": 2.0948180815876517e-07, "loss": 1.4825, "step": 19 }, { "epoch": 0.005517241379310344, "grad_norm": 11.851210594177246, "learning_rate": 2.205071664829107e-07, "loss": 1.4897, "step": 20 }, { "epoch": 0.005793103448275862, "grad_norm": 16.31206512451172, "learning_rate": 2.3153252480705625e-07, "loss": 1.6748, "step": 21 }, { "epoch": 0.006068965517241379, "grad_norm": 12.961336135864258, "learning_rate": 2.4255788313120177e-07, "loss": 1.5844, "step": 22 }, { "epoch": 0.006344827586206896, "grad_norm": 12.903078079223633, "learning_rate": 2.535832414553473e-07, "loss": 1.4159, "step": 23 }, { "epoch": 0.006620689655172414, "grad_norm": 11.157917976379395, "learning_rate": 2.6460859977949285e-07, "loss": 1.497, "step": 24 }, { "epoch": 0.006896551724137931, "grad_norm": 14.281108856201172, "learning_rate": 2.756339581036384e-07, "loss": 1.6588, "step": 25 }, { "epoch": 0.007172413793103448, "grad_norm": 9.399060249328613, "learning_rate": 2.8665931642778394e-07, "loss": 1.4061, "step": 26 }, { "epoch": 0.0074482758620689656, "grad_norm": 13.083234786987305, "learning_rate": 2.976846747519295e-07, "loss": 1.6166, "step": 27 }, { "epoch": 0.007724137931034483, "grad_norm": 14.169229507446289, "learning_rate": 3.0871003307607497e-07, "loss": 1.5804, "step": 28 }, { "epoch": 0.008, "grad_norm": 11.60746955871582, "learning_rate": 3.1973539140022057e-07, "loss": 1.4601, "step": 29 }, { "epoch": 0.008275862068965517, "grad_norm": 9.871387481689453, "learning_rate": 3.307607497243661e-07, "loss": 1.4486, "step": 30 }, { "epoch": 0.008551724137931035, "grad_norm": 11.829644203186035, "learning_rate": 3.417861080485116e-07, "loss": 1.7223, "step": 31 }, { "epoch": 0.008827586206896552, "grad_norm": 10.686468124389648, "learning_rate": 3.5281146637265714e-07, "loss": 1.5511, "step": 32 }, { "epoch": 0.00910344827586207, "grad_norm": 10.522947311401367, "learning_rate": 3.638368246968027e-07, "loss": 1.6018, "step": 33 }, { "epoch": 0.009379310344827587, "grad_norm": 10.392460823059082, "learning_rate": 3.7486218302094817e-07, "loss": 1.4165, "step": 34 }, { "epoch": 0.009655172413793104, "grad_norm": 10.431242942810059, "learning_rate": 3.8588754134509377e-07, "loss": 1.3653, "step": 35 }, { "epoch": 0.009931034482758621, "grad_norm": 9.096525192260742, "learning_rate": 3.969128996692393e-07, "loss": 1.3196, "step": 36 }, { "epoch": 0.010206896551724139, "grad_norm": 8.109827041625977, "learning_rate": 4.079382579933848e-07, "loss": 1.498, "step": 37 }, { "epoch": 0.010482758620689656, "grad_norm": 8.71412467956543, "learning_rate": 4.1896361631753034e-07, "loss": 1.4604, "step": 38 }, { "epoch": 0.010758620689655173, "grad_norm": 7.670372009277344, "learning_rate": 4.2998897464167594e-07, "loss": 1.4983, "step": 39 }, { "epoch": 0.011034482758620689, "grad_norm": 6.797059535980225, "learning_rate": 4.410143329658214e-07, "loss": 1.3734, "step": 40 }, { "epoch": 0.011310344827586206, "grad_norm": 6.465442180633545, "learning_rate": 4.5203969128996697e-07, "loss": 1.4048, "step": 41 }, { "epoch": 0.011586206896551723, "grad_norm": 6.323879241943359, "learning_rate": 4.630650496141125e-07, "loss": 1.3911, "step": 42 }, { "epoch": 0.01186206896551724, "grad_norm": 7.439223289489746, "learning_rate": 4.74090407938258e-07, "loss": 1.3607, "step": 43 }, { "epoch": 0.012137931034482758, "grad_norm": 6.599254608154297, "learning_rate": 4.851157662624035e-07, "loss": 1.3674, "step": 44 }, { "epoch": 0.012413793103448275, "grad_norm": 6.347580909729004, "learning_rate": 4.961411245865491e-07, "loss": 1.35, "step": 45 }, { "epoch": 0.012689655172413793, "grad_norm": 5.824375152587891, "learning_rate": 5.071664829106946e-07, "loss": 1.2774, "step": 46 }, { "epoch": 0.01296551724137931, "grad_norm": 6.596009731292725, "learning_rate": 5.181918412348401e-07, "loss": 1.4911, "step": 47 }, { "epoch": 0.013241379310344827, "grad_norm": 6.105870246887207, "learning_rate": 5.292171995589857e-07, "loss": 1.4404, "step": 48 }, { "epoch": 0.013517241379310345, "grad_norm": 6.756892204284668, "learning_rate": 5.402425578831313e-07, "loss": 1.2579, "step": 49 }, { "epoch": 0.013793103448275862, "grad_norm": 6.318734169006348, "learning_rate": 5.512679162072768e-07, "loss": 1.4909, "step": 50 }, { "epoch": 0.01406896551724138, "grad_norm": 6.299753665924072, "learning_rate": 5.622932745314223e-07, "loss": 1.2762, "step": 51 }, { "epoch": 0.014344827586206896, "grad_norm": 6.019251346588135, "learning_rate": 5.733186328555679e-07, "loss": 1.3581, "step": 52 }, { "epoch": 0.014620689655172414, "grad_norm": 6.803472995758057, "learning_rate": 5.843439911797134e-07, "loss": 1.3067, "step": 53 }, { "epoch": 0.014896551724137931, "grad_norm": 6.126049995422363, "learning_rate": 5.95369349503859e-07, "loss": 1.3403, "step": 54 }, { "epoch": 0.015172413793103448, "grad_norm": 5.728592395782471, "learning_rate": 6.063947078280045e-07, "loss": 1.275, "step": 55 }, { "epoch": 0.015448275862068966, "grad_norm": 6.171160697937012, "learning_rate": 6.174200661521499e-07, "loss": 1.4031, "step": 56 }, { "epoch": 0.01572413793103448, "grad_norm": 5.886373519897461, "learning_rate": 6.284454244762955e-07, "loss": 1.413, "step": 57 }, { "epoch": 0.016, "grad_norm": 5.829761981964111, "learning_rate": 6.394707828004411e-07, "loss": 1.5584, "step": 58 }, { "epoch": 0.016275862068965516, "grad_norm": 6.071510314941406, "learning_rate": 6.504961411245866e-07, "loss": 1.3551, "step": 59 }, { "epoch": 0.016551724137931035, "grad_norm": 5.8160200119018555, "learning_rate": 6.615214994487322e-07, "loss": 1.2991, "step": 60 }, { "epoch": 0.01682758620689655, "grad_norm": 6.241907596588135, "learning_rate": 6.725468577728776e-07, "loss": 1.4269, "step": 61 }, { "epoch": 0.01710344827586207, "grad_norm": 6.230677127838135, "learning_rate": 6.835722160970232e-07, "loss": 1.3072, "step": 62 }, { "epoch": 0.017379310344827585, "grad_norm": 5.978914260864258, "learning_rate": 6.945975744211687e-07, "loss": 1.2946, "step": 63 }, { "epoch": 0.017655172413793104, "grad_norm": 7.002822399139404, "learning_rate": 7.056229327453143e-07, "loss": 1.404, "step": 64 }, { "epoch": 0.01793103448275862, "grad_norm": 6.320220947265625, "learning_rate": 7.166482910694599e-07, "loss": 1.5685, "step": 65 }, { "epoch": 0.01820689655172414, "grad_norm": 5.663773059844971, "learning_rate": 7.276736493936054e-07, "loss": 1.3396, "step": 66 }, { "epoch": 0.018482758620689654, "grad_norm": 5.616995811462402, "learning_rate": 7.386990077177509e-07, "loss": 1.2936, "step": 67 }, { "epoch": 0.018758620689655173, "grad_norm": 5.6175432205200195, "learning_rate": 7.497243660418963e-07, "loss": 1.4, "step": 68 }, { "epoch": 0.01903448275862069, "grad_norm": 5.456417083740234, "learning_rate": 7.607497243660419e-07, "loss": 1.2368, "step": 69 }, { "epoch": 0.019310344827586208, "grad_norm": 5.476500988006592, "learning_rate": 7.717750826901875e-07, "loss": 1.2768, "step": 70 }, { "epoch": 0.019586206896551724, "grad_norm": 5.3919358253479, "learning_rate": 7.82800441014333e-07, "loss": 1.2767, "step": 71 }, { "epoch": 0.019862068965517243, "grad_norm": 5.606067180633545, "learning_rate": 7.938257993384786e-07, "loss": 1.2365, "step": 72 }, { "epoch": 0.020137931034482758, "grad_norm": 5.597115993499756, "learning_rate": 8.04851157662624e-07, "loss": 1.3257, "step": 73 }, { "epoch": 0.020413793103448277, "grad_norm": 5.6076555252075195, "learning_rate": 8.158765159867696e-07, "loss": 1.4214, "step": 74 }, { "epoch": 0.020689655172413793, "grad_norm": 6.1715407371521, "learning_rate": 8.269018743109152e-07, "loss": 1.5029, "step": 75 }, { "epoch": 0.020965517241379312, "grad_norm": 5.707515239715576, "learning_rate": 8.379272326350607e-07, "loss": 1.4821, "step": 76 }, { "epoch": 0.021241379310344827, "grad_norm": 5.848443984985352, "learning_rate": 8.489525909592063e-07, "loss": 1.2964, "step": 77 }, { "epoch": 0.021517241379310346, "grad_norm": 5.179744243621826, "learning_rate": 8.599779492833519e-07, "loss": 1.3815, "step": 78 }, { "epoch": 0.021793103448275862, "grad_norm": 5.23212194442749, "learning_rate": 8.710033076074974e-07, "loss": 1.4686, "step": 79 }, { "epoch": 0.022068965517241378, "grad_norm": 5.363963603973389, "learning_rate": 8.820286659316428e-07, "loss": 1.4411, "step": 80 }, { "epoch": 0.022344827586206897, "grad_norm": 5.478977680206299, "learning_rate": 8.930540242557883e-07, "loss": 1.441, "step": 81 }, { "epoch": 0.022620689655172412, "grad_norm": 5.3605499267578125, "learning_rate": 9.040793825799339e-07, "loss": 1.4924, "step": 82 }, { "epoch": 0.02289655172413793, "grad_norm": 5.276201248168945, "learning_rate": 9.151047409040794e-07, "loss": 1.3567, "step": 83 }, { "epoch": 0.023172413793103447, "grad_norm": 5.4800591468811035, "learning_rate": 9.26130099228225e-07, "loss": 1.2757, "step": 84 }, { "epoch": 0.023448275862068966, "grad_norm": 5.653221607208252, "learning_rate": 9.371554575523706e-07, "loss": 1.3672, "step": 85 }, { "epoch": 0.02372413793103448, "grad_norm": 5.331080436706543, "learning_rate": 9.48180815876516e-07, "loss": 1.3722, "step": 86 }, { "epoch": 0.024, "grad_norm": 5.25465202331543, "learning_rate": 9.592061742006615e-07, "loss": 1.3699, "step": 87 }, { "epoch": 0.024275862068965516, "grad_norm": 5.098982334136963, "learning_rate": 9.70231532524807e-07, "loss": 1.2715, "step": 88 }, { "epoch": 0.024551724137931035, "grad_norm": 5.097829818725586, "learning_rate": 9.812568908489527e-07, "loss": 1.305, "step": 89 }, { "epoch": 0.02482758620689655, "grad_norm": 5.901406764984131, "learning_rate": 9.922822491730983e-07, "loss": 1.4332, "step": 90 }, { "epoch": 0.02510344827586207, "grad_norm": 5.54692268371582, "learning_rate": 1.0033076074972439e-06, "loss": 1.309, "step": 91 }, { "epoch": 0.025379310344827585, "grad_norm": 4.7909650802612305, "learning_rate": 1.0143329658213892e-06, "loss": 1.3635, "step": 92 }, { "epoch": 0.025655172413793104, "grad_norm": 5.354738712310791, "learning_rate": 1.0253583241455348e-06, "loss": 1.3836, "step": 93 }, { "epoch": 0.02593103448275862, "grad_norm": 5.910029888153076, "learning_rate": 1.0363836824696802e-06, "loss": 1.4249, "step": 94 }, { "epoch": 0.02620689655172414, "grad_norm": 6.196692943572998, "learning_rate": 1.0474090407938258e-06, "loss": 1.3889, "step": 95 }, { "epoch": 0.026482758620689655, "grad_norm": 5.580589771270752, "learning_rate": 1.0584343991179714e-06, "loss": 1.3097, "step": 96 }, { "epoch": 0.026758620689655174, "grad_norm": 5.588647842407227, "learning_rate": 1.069459757442117e-06, "loss": 1.3147, "step": 97 }, { "epoch": 0.02703448275862069, "grad_norm": 5.708091735839844, "learning_rate": 1.0804851157662626e-06, "loss": 1.4356, "step": 98 }, { "epoch": 0.027310344827586208, "grad_norm": 5.703024864196777, "learning_rate": 1.091510474090408e-06, "loss": 1.3109, "step": 99 }, { "epoch": 0.027586206896551724, "grad_norm": 5.545051574707031, "learning_rate": 1.1025358324145536e-06, "loss": 1.4645, "step": 100 }, { "epoch": 0.027862068965517243, "grad_norm": 5.4899373054504395, "learning_rate": 1.1135611907386992e-06, "loss": 1.2786, "step": 101 }, { "epoch": 0.02813793103448276, "grad_norm": 5.41928768157959, "learning_rate": 1.1245865490628446e-06, "loss": 1.2141, "step": 102 }, { "epoch": 0.028413793103448277, "grad_norm": 6.180675029754639, "learning_rate": 1.1356119073869902e-06, "loss": 1.4696, "step": 103 }, { "epoch": 0.028689655172413793, "grad_norm": 5.71194314956665, "learning_rate": 1.1466372657111358e-06, "loss": 1.2979, "step": 104 }, { "epoch": 0.028965517241379312, "grad_norm": 5.088263988494873, "learning_rate": 1.1576626240352811e-06, "loss": 1.3131, "step": 105 }, { "epoch": 0.029241379310344828, "grad_norm": 5.251824855804443, "learning_rate": 1.1686879823594267e-06, "loss": 1.3152, "step": 106 }, { "epoch": 0.029517241379310343, "grad_norm": 5.503067493438721, "learning_rate": 1.1797133406835723e-06, "loss": 1.2822, "step": 107 }, { "epoch": 0.029793103448275862, "grad_norm": 5.677896499633789, "learning_rate": 1.190738699007718e-06, "loss": 1.1037, "step": 108 }, { "epoch": 0.030068965517241378, "grad_norm": 5.476407527923584, "learning_rate": 1.2017640573318633e-06, "loss": 1.2868, "step": 109 }, { "epoch": 0.030344827586206897, "grad_norm": 5.205259799957275, "learning_rate": 1.212789415656009e-06, "loss": 1.1952, "step": 110 }, { "epoch": 0.030620689655172412, "grad_norm": 5.264379501342773, "learning_rate": 1.2238147739801543e-06, "loss": 1.2215, "step": 111 }, { "epoch": 0.03089655172413793, "grad_norm": 5.060546398162842, "learning_rate": 1.2348401323042999e-06, "loss": 1.2841, "step": 112 }, { "epoch": 0.031172413793103447, "grad_norm": 5.255555629730225, "learning_rate": 1.2458654906284455e-06, "loss": 1.2635, "step": 113 }, { "epoch": 0.03144827586206896, "grad_norm": 5.389891624450684, "learning_rate": 1.256890848952591e-06, "loss": 1.219, "step": 114 }, { "epoch": 0.031724137931034485, "grad_norm": 5.030769348144531, "learning_rate": 1.2679162072767365e-06, "loss": 1.2514, "step": 115 }, { "epoch": 0.032, "grad_norm": 4.894954204559326, "learning_rate": 1.2789415656008823e-06, "loss": 1.3049, "step": 116 }, { "epoch": 0.032275862068965516, "grad_norm": 5.717860221862793, "learning_rate": 1.2899669239250276e-06, "loss": 1.3274, "step": 117 }, { "epoch": 0.03255172413793103, "grad_norm": 4.946012020111084, "learning_rate": 1.3009922822491732e-06, "loss": 1.1289, "step": 118 }, { "epoch": 0.032827586206896554, "grad_norm": 5.540721893310547, "learning_rate": 1.3120176405733186e-06, "loss": 1.1867, "step": 119 }, { "epoch": 0.03310344827586207, "grad_norm": 5.324296951293945, "learning_rate": 1.3230429988974644e-06, "loss": 1.2141, "step": 120 }, { "epoch": 0.033379310344827585, "grad_norm": 5.411397933959961, "learning_rate": 1.3340683572216098e-06, "loss": 1.442, "step": 121 }, { "epoch": 0.0336551724137931, "grad_norm": 5.014051914215088, "learning_rate": 1.3450937155457552e-06, "loss": 1.1244, "step": 122 }, { "epoch": 0.033931034482758624, "grad_norm": 4.933216571807861, "learning_rate": 1.356119073869901e-06, "loss": 1.3349, "step": 123 }, { "epoch": 0.03420689655172414, "grad_norm": 5.244227886199951, "learning_rate": 1.3671444321940464e-06, "loss": 1.314, "step": 124 }, { "epoch": 0.034482758620689655, "grad_norm": 5.089042663574219, "learning_rate": 1.378169790518192e-06, "loss": 1.456, "step": 125 }, { "epoch": 0.03475862068965517, "grad_norm": 5.73339319229126, "learning_rate": 1.3891951488423374e-06, "loss": 1.2358, "step": 126 }, { "epoch": 0.03503448275862069, "grad_norm": 5.456304550170898, "learning_rate": 1.400220507166483e-06, "loss": 1.156, "step": 127 }, { "epoch": 0.03531034482758621, "grad_norm": 4.978187084197998, "learning_rate": 1.4112458654906286e-06, "loss": 1.2618, "step": 128 }, { "epoch": 0.035586206896551724, "grad_norm": 5.184937477111816, "learning_rate": 1.422271223814774e-06, "loss": 1.458, "step": 129 }, { "epoch": 0.03586206896551724, "grad_norm": 5.324441909790039, "learning_rate": 1.4332965821389197e-06, "loss": 1.3116, "step": 130 }, { "epoch": 0.03613793103448276, "grad_norm": 5.04480504989624, "learning_rate": 1.4443219404630651e-06, "loss": 1.3411, "step": 131 }, { "epoch": 0.03641379310344828, "grad_norm": 5.956013202667236, "learning_rate": 1.4553472987872107e-06, "loss": 1.3311, "step": 132 }, { "epoch": 0.03668965517241379, "grad_norm": 5.4459228515625, "learning_rate": 1.4663726571113563e-06, "loss": 1.1595, "step": 133 }, { "epoch": 0.03696551724137931, "grad_norm": 4.785271644592285, "learning_rate": 1.4773980154355017e-06, "loss": 1.2874, "step": 134 }, { "epoch": 0.037241379310344824, "grad_norm": 4.979898452758789, "learning_rate": 1.4884233737596473e-06, "loss": 1.2661, "step": 135 }, { "epoch": 0.03751724137931035, "grad_norm": 5.311439037322998, "learning_rate": 1.4994487320837927e-06, "loss": 1.2102, "step": 136 }, { "epoch": 0.03779310344827586, "grad_norm": 5.634222030639648, "learning_rate": 1.5104740904079385e-06, "loss": 1.3995, "step": 137 }, { "epoch": 0.03806896551724138, "grad_norm": 5.550137519836426, "learning_rate": 1.5214994487320839e-06, "loss": 1.3748, "step": 138 }, { "epoch": 0.038344827586206894, "grad_norm": 5.507777214050293, "learning_rate": 1.5325248070562297e-06, "loss": 1.3, "step": 139 }, { "epoch": 0.038620689655172416, "grad_norm": 5.375074863433838, "learning_rate": 1.543550165380375e-06, "loss": 1.224, "step": 140 }, { "epoch": 0.03889655172413793, "grad_norm": 4.827266216278076, "learning_rate": 1.5545755237045204e-06, "loss": 1.2577, "step": 141 }, { "epoch": 0.03917241379310345, "grad_norm": 5.957742214202881, "learning_rate": 1.565600882028666e-06, "loss": 1.5677, "step": 142 }, { "epoch": 0.03944827586206896, "grad_norm": 5.157381057739258, "learning_rate": 1.5766262403528114e-06, "loss": 1.2932, "step": 143 }, { "epoch": 0.039724137931034485, "grad_norm": 5.8236308097839355, "learning_rate": 1.5876515986769572e-06, "loss": 1.3254, "step": 144 }, { "epoch": 0.04, "grad_norm": 4.822021961212158, "learning_rate": 1.5986769570011026e-06, "loss": 1.3128, "step": 145 }, { "epoch": 0.040275862068965516, "grad_norm": 5.536880016326904, "learning_rate": 1.609702315325248e-06, "loss": 1.5358, "step": 146 }, { "epoch": 0.04055172413793103, "grad_norm": 5.692475318908691, "learning_rate": 1.6207276736493938e-06, "loss": 1.2458, "step": 147 }, { "epoch": 0.040827586206896554, "grad_norm": 5.26165771484375, "learning_rate": 1.6317530319735392e-06, "loss": 1.3987, "step": 148 }, { "epoch": 0.04110344827586207, "grad_norm": 5.778842926025391, "learning_rate": 1.6427783902976848e-06, "loss": 1.3872, "step": 149 }, { "epoch": 0.041379310344827586, "grad_norm": 5.265389919281006, "learning_rate": 1.6538037486218304e-06, "loss": 1.2679, "step": 150 }, { "epoch": 0.0416551724137931, "grad_norm": 5.532073020935059, "learning_rate": 1.664829106945976e-06, "loss": 1.4281, "step": 151 }, { "epoch": 0.041931034482758624, "grad_norm": 5.497222423553467, "learning_rate": 1.6758544652701214e-06, "loss": 1.3896, "step": 152 }, { "epoch": 0.04220689655172414, "grad_norm": 5.65834379196167, "learning_rate": 1.6868798235942667e-06, "loss": 1.3669, "step": 153 }, { "epoch": 0.042482758620689655, "grad_norm": 4.806946754455566, "learning_rate": 1.6979051819184126e-06, "loss": 1.1214, "step": 154 }, { "epoch": 0.04275862068965517, "grad_norm": 5.296016216278076, "learning_rate": 1.708930540242558e-06, "loss": 1.3182, "step": 155 }, { "epoch": 0.04303448275862069, "grad_norm": 5.259536266326904, "learning_rate": 1.7199558985667037e-06, "loss": 1.3642, "step": 156 }, { "epoch": 0.04331034482758621, "grad_norm": 5.125474452972412, "learning_rate": 1.7309812568908491e-06, "loss": 1.509, "step": 157 }, { "epoch": 0.043586206896551724, "grad_norm": 5.2925591468811035, "learning_rate": 1.7420066152149947e-06, "loss": 1.3576, "step": 158 }, { "epoch": 0.04386206896551724, "grad_norm": 4.815709590911865, "learning_rate": 1.75303197353914e-06, "loss": 1.166, "step": 159 }, { "epoch": 0.044137931034482755, "grad_norm": 5.321090221405029, "learning_rate": 1.7640573318632857e-06, "loss": 1.5407, "step": 160 }, { "epoch": 0.04441379310344828, "grad_norm": 4.980518341064453, "learning_rate": 1.7750826901874313e-06, "loss": 1.1857, "step": 161 }, { "epoch": 0.04468965517241379, "grad_norm": 5.172889709472656, "learning_rate": 1.7861080485115767e-06, "loss": 1.2698, "step": 162 }, { "epoch": 0.04496551724137931, "grad_norm": 5.092203617095947, "learning_rate": 1.7971334068357225e-06, "loss": 1.1351, "step": 163 }, { "epoch": 0.045241379310344824, "grad_norm": 4.930086135864258, "learning_rate": 1.8081587651598679e-06, "loss": 1.293, "step": 164 }, { "epoch": 0.04551724137931035, "grad_norm": 5.3338470458984375, "learning_rate": 1.8191841234840132e-06, "loss": 1.3699, "step": 165 }, { "epoch": 0.04579310344827586, "grad_norm": 5.286059856414795, "learning_rate": 1.8302094818081588e-06, "loss": 1.2024, "step": 166 }, { "epoch": 0.04606896551724138, "grad_norm": 5.920238971710205, "learning_rate": 1.8412348401323044e-06, "loss": 1.301, "step": 167 }, { "epoch": 0.046344827586206894, "grad_norm": 4.5586137771606445, "learning_rate": 1.85226019845645e-06, "loss": 1.4379, "step": 168 }, { "epoch": 0.046620689655172416, "grad_norm": 4.898438453674316, "learning_rate": 1.8632855567805954e-06, "loss": 1.2366, "step": 169 }, { "epoch": 0.04689655172413793, "grad_norm": 5.407320976257324, "learning_rate": 1.8743109151047412e-06, "loss": 1.33, "step": 170 }, { "epoch": 0.04717241379310345, "grad_norm": 5.0541462898254395, "learning_rate": 1.8853362734288866e-06, "loss": 1.2805, "step": 171 }, { "epoch": 0.04744827586206896, "grad_norm": 5.561731815338135, "learning_rate": 1.896361631753032e-06, "loss": 1.2031, "step": 172 }, { "epoch": 0.047724137931034485, "grad_norm": 4.950758457183838, "learning_rate": 1.9073869900771776e-06, "loss": 1.1362, "step": 173 }, { "epoch": 0.048, "grad_norm": 5.171731948852539, "learning_rate": 1.918412348401323e-06, "loss": 1.3722, "step": 174 }, { "epoch": 0.04827586206896552, "grad_norm": 5.350513458251953, "learning_rate": 1.9294377067254688e-06, "loss": 1.1389, "step": 175 }, { "epoch": 0.04855172413793103, "grad_norm": 5.304233551025391, "learning_rate": 1.940463065049614e-06, "loss": 1.2858, "step": 176 }, { "epoch": 0.048827586206896555, "grad_norm": 5.48848819732666, "learning_rate": 1.95148842337376e-06, "loss": 1.3456, "step": 177 }, { "epoch": 0.04910344827586207, "grad_norm": 5.357940196990967, "learning_rate": 1.9625137816979054e-06, "loss": 1.252, "step": 178 }, { "epoch": 0.049379310344827586, "grad_norm": 5.13185453414917, "learning_rate": 1.9735391400220507e-06, "loss": 1.3197, "step": 179 }, { "epoch": 0.0496551724137931, "grad_norm": 5.698286533355713, "learning_rate": 1.9845644983461965e-06, "loss": 1.4125, "step": 180 }, { "epoch": 0.049931034482758624, "grad_norm": 4.905097484588623, "learning_rate": 1.995589856670342e-06, "loss": 1.1472, "step": 181 }, { "epoch": 0.05020689655172414, "grad_norm": 5.259552001953125, "learning_rate": 2.0066152149944877e-06, "loss": 1.3079, "step": 182 }, { "epoch": 0.050482758620689655, "grad_norm": 6.10539436340332, "learning_rate": 2.017640573318633e-06, "loss": 1.3521, "step": 183 }, { "epoch": 0.05075862068965517, "grad_norm": 5.290539741516113, "learning_rate": 2.0286659316427785e-06, "loss": 1.3089, "step": 184 }, { "epoch": 0.05103448275862069, "grad_norm": 4.908003807067871, "learning_rate": 2.0396912899669243e-06, "loss": 1.3602, "step": 185 }, { "epoch": 0.05131034482758621, "grad_norm": 4.882885932922363, "learning_rate": 2.0507166482910697e-06, "loss": 1.1802, "step": 186 }, { "epoch": 0.051586206896551724, "grad_norm": 5.072561264038086, "learning_rate": 2.061742006615215e-06, "loss": 1.3781, "step": 187 }, { "epoch": 0.05186206896551724, "grad_norm": 4.707251071929932, "learning_rate": 2.0727673649393605e-06, "loss": 1.3042, "step": 188 }, { "epoch": 0.052137931034482755, "grad_norm": 4.44943904876709, "learning_rate": 2.0837927232635063e-06, "loss": 1.352, "step": 189 }, { "epoch": 0.05241379310344828, "grad_norm": 5.196096420288086, "learning_rate": 2.0948180815876516e-06, "loss": 1.2954, "step": 190 }, { "epoch": 0.052689655172413793, "grad_norm": 5.206119537353516, "learning_rate": 2.105843439911797e-06, "loss": 1.2294, "step": 191 }, { "epoch": 0.05296551724137931, "grad_norm": 5.072884559631348, "learning_rate": 2.116868798235943e-06, "loss": 1.1875, "step": 192 }, { "epoch": 0.053241379310344825, "grad_norm": 4.748642921447754, "learning_rate": 2.1278941565600882e-06, "loss": 1.4328, "step": 193 }, { "epoch": 0.05351724137931035, "grad_norm": 4.740292549133301, "learning_rate": 2.138919514884234e-06, "loss": 1.31, "step": 194 }, { "epoch": 0.05379310344827586, "grad_norm": 5.764307022094727, "learning_rate": 2.1499448732083794e-06, "loss": 1.3292, "step": 195 }, { "epoch": 0.05406896551724138, "grad_norm": 5.619941234588623, "learning_rate": 2.1609702315325252e-06, "loss": 1.4759, "step": 196 }, { "epoch": 0.054344827586206894, "grad_norm": 5.305754661560059, "learning_rate": 2.1719955898566706e-06, "loss": 1.366, "step": 197 }, { "epoch": 0.054620689655172416, "grad_norm": 5.718596935272217, "learning_rate": 2.183020948180816e-06, "loss": 1.2181, "step": 198 }, { "epoch": 0.05489655172413793, "grad_norm": 5.417957782745361, "learning_rate": 2.194046306504962e-06, "loss": 1.2076, "step": 199 }, { "epoch": 0.05517241379310345, "grad_norm": 4.929399490356445, "learning_rate": 2.205071664829107e-06, "loss": 1.0614, "step": 200 }, { "epoch": 0.05544827586206896, "grad_norm": 4.594115734100342, "learning_rate": 2.2160970231532526e-06, "loss": 1.2828, "step": 201 }, { "epoch": 0.055724137931034486, "grad_norm": 4.887152194976807, "learning_rate": 2.2271223814773984e-06, "loss": 1.1993, "step": 202 }, { "epoch": 0.056, "grad_norm": 5.430395126342773, "learning_rate": 2.2381477398015437e-06, "loss": 1.3763, "step": 203 }, { "epoch": 0.05627586206896552, "grad_norm": 4.999973297119141, "learning_rate": 2.249173098125689e-06, "loss": 1.2881, "step": 204 }, { "epoch": 0.05655172413793103, "grad_norm": 5.115943908691406, "learning_rate": 2.2601984564498345e-06, "loss": 1.1878, "step": 205 }, { "epoch": 0.056827586206896555, "grad_norm": 5.244913101196289, "learning_rate": 2.2712238147739803e-06, "loss": 1.3582, "step": 206 }, { "epoch": 0.05710344827586207, "grad_norm": 5.233243465423584, "learning_rate": 2.2822491730981257e-06, "loss": 1.3388, "step": 207 }, { "epoch": 0.057379310344827586, "grad_norm": 4.940476894378662, "learning_rate": 2.2932745314222715e-06, "loss": 1.286, "step": 208 }, { "epoch": 0.0576551724137931, "grad_norm": 5.942206382751465, "learning_rate": 2.304299889746417e-06, "loss": 1.2228, "step": 209 }, { "epoch": 0.057931034482758624, "grad_norm": 5.168428897857666, "learning_rate": 2.3153252480705623e-06, "loss": 1.2872, "step": 210 }, { "epoch": 0.05820689655172414, "grad_norm": 5.429690361022949, "learning_rate": 2.326350606394708e-06, "loss": 1.1666, "step": 211 }, { "epoch": 0.058482758620689655, "grad_norm": 5.889111518859863, "learning_rate": 2.3373759647188535e-06, "loss": 1.3651, "step": 212 }, { "epoch": 0.05875862068965517, "grad_norm": 4.902268409729004, "learning_rate": 2.3484013230429993e-06, "loss": 1.2585, "step": 213 }, { "epoch": 0.059034482758620686, "grad_norm": 5.136106967926025, "learning_rate": 2.3594266813671447e-06, "loss": 1.3185, "step": 214 }, { "epoch": 0.05931034482758621, "grad_norm": 5.093679904937744, "learning_rate": 2.3704520396912905e-06, "loss": 1.374, "step": 215 }, { "epoch": 0.059586206896551724, "grad_norm": 5.2938551902771, "learning_rate": 2.381477398015436e-06, "loss": 1.204, "step": 216 }, { "epoch": 0.05986206896551724, "grad_norm": 5.493772983551025, "learning_rate": 2.3925027563395812e-06, "loss": 1.3458, "step": 217 }, { "epoch": 0.060137931034482756, "grad_norm": 5.408130168914795, "learning_rate": 2.4035281146637266e-06, "loss": 1.2224, "step": 218 }, { "epoch": 0.06041379310344828, "grad_norm": 5.084261417388916, "learning_rate": 2.4145534729878724e-06, "loss": 1.2004, "step": 219 }, { "epoch": 0.060689655172413794, "grad_norm": 5.135919094085693, "learning_rate": 2.425578831312018e-06, "loss": 1.0607, "step": 220 }, { "epoch": 0.06096551724137931, "grad_norm": 5.719693660736084, "learning_rate": 2.436604189636163e-06, "loss": 1.3123, "step": 221 }, { "epoch": 0.061241379310344825, "grad_norm": 5.055806636810303, "learning_rate": 2.4476295479603086e-06, "loss": 1.2185, "step": 222 }, { "epoch": 0.06151724137931035, "grad_norm": 5.632617473602295, "learning_rate": 2.4586549062844544e-06, "loss": 1.5052, "step": 223 }, { "epoch": 0.06179310344827586, "grad_norm": 5.644922733306885, "learning_rate": 2.4696802646085998e-06, "loss": 1.2717, "step": 224 }, { "epoch": 0.06206896551724138, "grad_norm": 5.598965644836426, "learning_rate": 2.4807056229327456e-06, "loss": 1.5155, "step": 225 }, { "epoch": 0.062344827586206894, "grad_norm": 5.31247091293335, "learning_rate": 2.491730981256891e-06, "loss": 1.1933, "step": 226 }, { "epoch": 0.06262068965517241, "grad_norm": 5.2328290939331055, "learning_rate": 2.5027563395810368e-06, "loss": 1.2109, "step": 227 }, { "epoch": 0.06289655172413793, "grad_norm": 5.614056587219238, "learning_rate": 2.513781697905182e-06, "loss": 1.2393, "step": 228 }, { "epoch": 0.06317241379310345, "grad_norm": 4.781396865844727, "learning_rate": 2.5248070562293275e-06, "loss": 1.1812, "step": 229 }, { "epoch": 0.06344827586206897, "grad_norm": 5.217669486999512, "learning_rate": 2.535832414553473e-06, "loss": 1.3692, "step": 230 }, { "epoch": 0.06372413793103449, "grad_norm": 5.420798301696777, "learning_rate": 2.5468577728776187e-06, "loss": 1.448, "step": 231 }, { "epoch": 0.064, "grad_norm": 4.977224826812744, "learning_rate": 2.5578831312017645e-06, "loss": 1.1776, "step": 232 }, { "epoch": 0.06427586206896552, "grad_norm": 5.152836322784424, "learning_rate": 2.56890848952591e-06, "loss": 1.2791, "step": 233 }, { "epoch": 0.06455172413793103, "grad_norm": 5.3824896812438965, "learning_rate": 2.5799338478500553e-06, "loss": 1.2831, "step": 234 }, { "epoch": 0.06482758620689655, "grad_norm": 5.2175750732421875, "learning_rate": 2.5909592061742007e-06, "loss": 1.1764, "step": 235 }, { "epoch": 0.06510344827586206, "grad_norm": 4.974033832550049, "learning_rate": 2.6019845644983465e-06, "loss": 1.2455, "step": 236 }, { "epoch": 0.0653793103448276, "grad_norm": 4.945578575134277, "learning_rate": 2.613009922822492e-06, "loss": 1.307, "step": 237 }, { "epoch": 0.06565517241379311, "grad_norm": 5.325129508972168, "learning_rate": 2.6240352811466373e-06, "loss": 1.3046, "step": 238 }, { "epoch": 0.06593103448275862, "grad_norm": 5.37120246887207, "learning_rate": 2.6350606394707826e-06, "loss": 1.4706, "step": 239 }, { "epoch": 0.06620689655172414, "grad_norm": 4.816856384277344, "learning_rate": 2.646085997794929e-06, "loss": 1.2078, "step": 240 }, { "epoch": 0.06648275862068966, "grad_norm": 5.822649955749512, "learning_rate": 2.6571113561190742e-06, "loss": 1.3151, "step": 241 }, { "epoch": 0.06675862068965517, "grad_norm": 4.978457927703857, "learning_rate": 2.6681367144432196e-06, "loss": 1.259, "step": 242 }, { "epoch": 0.06703448275862069, "grad_norm": 5.133908271789551, "learning_rate": 2.679162072767365e-06, "loss": 1.1116, "step": 243 }, { "epoch": 0.0673103448275862, "grad_norm": 4.368741989135742, "learning_rate": 2.6901874310915104e-06, "loss": 1.1239, "step": 244 }, { "epoch": 0.06758620689655172, "grad_norm": 5.236698150634766, "learning_rate": 2.7012127894156566e-06, "loss": 1.2705, "step": 245 }, { "epoch": 0.06786206896551725, "grad_norm": 5.547703742980957, "learning_rate": 2.712238147739802e-06, "loss": 1.1519, "step": 246 }, { "epoch": 0.06813793103448276, "grad_norm": 4.526394844055176, "learning_rate": 2.7232635060639474e-06, "loss": 1.1515, "step": 247 }, { "epoch": 0.06841379310344828, "grad_norm": 5.369807720184326, "learning_rate": 2.7342888643880928e-06, "loss": 1.2818, "step": 248 }, { "epoch": 0.0686896551724138, "grad_norm": 5.205616474151611, "learning_rate": 2.745314222712238e-06, "loss": 1.261, "step": 249 }, { "epoch": 0.06896551724137931, "grad_norm": 5.029371738433838, "learning_rate": 2.756339581036384e-06, "loss": 1.4541, "step": 250 }, { "epoch": 0.06924137931034482, "grad_norm": 5.806656837463379, "learning_rate": 2.7673649393605294e-06, "loss": 1.3115, "step": 251 }, { "epoch": 0.06951724137931034, "grad_norm": 5.128088474273682, "learning_rate": 2.7783902976846747e-06, "loss": 1.3186, "step": 252 }, { "epoch": 0.06979310344827586, "grad_norm": 5.238038063049316, "learning_rate": 2.7894156560088205e-06, "loss": 1.267, "step": 253 }, { "epoch": 0.07006896551724139, "grad_norm": 5.919521808624268, "learning_rate": 2.800441014332966e-06, "loss": 1.2413, "step": 254 }, { "epoch": 0.0703448275862069, "grad_norm": 4.9508538246154785, "learning_rate": 2.8114663726571117e-06, "loss": 1.1928, "step": 255 }, { "epoch": 0.07062068965517242, "grad_norm": 4.85231876373291, "learning_rate": 2.822491730981257e-06, "loss": 1.2798, "step": 256 }, { "epoch": 0.07089655172413793, "grad_norm": 4.957312107086182, "learning_rate": 2.8335170893054025e-06, "loss": 1.1892, "step": 257 }, { "epoch": 0.07117241379310345, "grad_norm": 5.011900424957275, "learning_rate": 2.844542447629548e-06, "loss": 1.2543, "step": 258 }, { "epoch": 0.07144827586206896, "grad_norm": 5.402774810791016, "learning_rate": 2.855567805953694e-06, "loss": 1.3159, "step": 259 }, { "epoch": 0.07172413793103448, "grad_norm": 4.842413425445557, "learning_rate": 2.8665931642778395e-06, "loss": 1.3377, "step": 260 }, { "epoch": 0.072, "grad_norm": 5.116781711578369, "learning_rate": 2.877618522601985e-06, "loss": 1.1437, "step": 261 }, { "epoch": 0.07227586206896552, "grad_norm": 5.292843818664551, "learning_rate": 2.8886438809261303e-06, "loss": 1.1429, "step": 262 }, { "epoch": 0.07255172413793104, "grad_norm": 5.109421253204346, "learning_rate": 2.8996692392502756e-06, "loss": 1.2943, "step": 263 }, { "epoch": 0.07282758620689656, "grad_norm": 5.700744152069092, "learning_rate": 2.9106945975744215e-06, "loss": 1.3817, "step": 264 }, { "epoch": 0.07310344827586207, "grad_norm": 5.299622535705566, "learning_rate": 2.9217199558985673e-06, "loss": 1.2417, "step": 265 }, { "epoch": 0.07337931034482759, "grad_norm": 5.484564304351807, "learning_rate": 2.9327453142227126e-06, "loss": 1.369, "step": 266 }, { "epoch": 0.0736551724137931, "grad_norm": 5.2038798332214355, "learning_rate": 2.943770672546858e-06, "loss": 1.2916, "step": 267 }, { "epoch": 0.07393103448275862, "grad_norm": 5.5229811668396, "learning_rate": 2.9547960308710034e-06, "loss": 1.3591, "step": 268 }, { "epoch": 0.07420689655172413, "grad_norm": 5.40981388092041, "learning_rate": 2.9658213891951492e-06, "loss": 1.3081, "step": 269 }, { "epoch": 0.07448275862068965, "grad_norm": 4.89354944229126, "learning_rate": 2.9768467475192946e-06, "loss": 1.1246, "step": 270 }, { "epoch": 0.07475862068965518, "grad_norm": 4.844183921813965, "learning_rate": 2.98787210584344e-06, "loss": 1.3287, "step": 271 }, { "epoch": 0.0750344827586207, "grad_norm": 5.228158950805664, "learning_rate": 2.9988974641675854e-06, "loss": 1.2271, "step": 272 }, { "epoch": 0.07531034482758621, "grad_norm": 4.679659366607666, "learning_rate": 3.0099228224917308e-06, "loss": 1.2061, "step": 273 }, { "epoch": 0.07558620689655172, "grad_norm": 5.699970245361328, "learning_rate": 3.020948180815877e-06, "loss": 1.4316, "step": 274 }, { "epoch": 0.07586206896551724, "grad_norm": 5.2897467613220215, "learning_rate": 3.0319735391400224e-06, "loss": 1.2323, "step": 275 }, { "epoch": 0.07613793103448276, "grad_norm": 5.237679481506348, "learning_rate": 3.0429988974641677e-06, "loss": 1.3433, "step": 276 }, { "epoch": 0.07641379310344827, "grad_norm": 5.119305610656738, "learning_rate": 3.054024255788313e-06, "loss": 1.1525, "step": 277 }, { "epoch": 0.07668965517241379, "grad_norm": 5.0122389793396, "learning_rate": 3.0650496141124594e-06, "loss": 1.3753, "step": 278 }, { "epoch": 0.07696551724137932, "grad_norm": 5.528941631317139, "learning_rate": 3.0760749724366047e-06, "loss": 1.2241, "step": 279 }, { "epoch": 0.07724137931034483, "grad_norm": 5.1954498291015625, "learning_rate": 3.08710033076075e-06, "loss": 1.3011, "step": 280 }, { "epoch": 0.07751724137931035, "grad_norm": 5.713561058044434, "learning_rate": 3.0981256890848955e-06, "loss": 1.2628, "step": 281 }, { "epoch": 0.07779310344827586, "grad_norm": 4.52591609954834, "learning_rate": 3.109151047409041e-06, "loss": 1.1616, "step": 282 }, { "epoch": 0.07806896551724138, "grad_norm": 5.1714606285095215, "learning_rate": 3.1201764057331867e-06, "loss": 1.3231, "step": 283 }, { "epoch": 0.0783448275862069, "grad_norm": 5.263928413391113, "learning_rate": 3.131201764057332e-06, "loss": 1.3078, "step": 284 }, { "epoch": 0.07862068965517241, "grad_norm": 4.929699897766113, "learning_rate": 3.1422271223814775e-06, "loss": 1.1951, "step": 285 }, { "epoch": 0.07889655172413793, "grad_norm": 5.158076286315918, "learning_rate": 3.153252480705623e-06, "loss": 1.4389, "step": 286 }, { "epoch": 0.07917241379310345, "grad_norm": 5.028899192810059, "learning_rate": 3.1642778390297687e-06, "loss": 1.3466, "step": 287 }, { "epoch": 0.07944827586206897, "grad_norm": 5.092339992523193, "learning_rate": 3.1753031973539145e-06, "loss": 1.333, "step": 288 }, { "epoch": 0.07972413793103449, "grad_norm": 4.9750800132751465, "learning_rate": 3.18632855567806e-06, "loss": 1.3578, "step": 289 }, { "epoch": 0.08, "grad_norm": 5.569305419921875, "learning_rate": 3.1973539140022052e-06, "loss": 1.4354, "step": 290 }, { "epoch": 0.08027586206896552, "grad_norm": 5.080304145812988, "learning_rate": 3.2083792723263506e-06, "loss": 1.3058, "step": 291 }, { "epoch": 0.08055172413793103, "grad_norm": 5.949770927429199, "learning_rate": 3.219404630650496e-06, "loss": 1.2221, "step": 292 }, { "epoch": 0.08082758620689655, "grad_norm": 5.082002639770508, "learning_rate": 3.2304299889746422e-06, "loss": 1.2044, "step": 293 }, { "epoch": 0.08110344827586206, "grad_norm": 5.432873249053955, "learning_rate": 3.2414553472987876e-06, "loss": 1.3248, "step": 294 }, { "epoch": 0.08137931034482758, "grad_norm": 5.247768878936768, "learning_rate": 3.252480705622933e-06, "loss": 1.2929, "step": 295 }, { "epoch": 0.08165517241379311, "grad_norm": 4.822509765625, "learning_rate": 3.2635060639470784e-06, "loss": 1.1174, "step": 296 }, { "epoch": 0.08193103448275862, "grad_norm": 5.666838645935059, "learning_rate": 3.274531422271224e-06, "loss": 1.2775, "step": 297 }, { "epoch": 0.08220689655172414, "grad_norm": 5.007264614105225, "learning_rate": 3.2855567805953696e-06, "loss": 1.2963, "step": 298 }, { "epoch": 0.08248275862068966, "grad_norm": 5.183523654937744, "learning_rate": 3.2965821389195154e-06, "loss": 1.1687, "step": 299 }, { "epoch": 0.08275862068965517, "grad_norm": 5.227467060089111, "learning_rate": 3.3076074972436608e-06, "loss": 1.3059, "step": 300 }, { "epoch": 0.08303448275862069, "grad_norm": 4.819178581237793, "learning_rate": 3.318632855567806e-06, "loss": 1.2972, "step": 301 }, { "epoch": 0.0833103448275862, "grad_norm": 4.732048034667969, "learning_rate": 3.329658213891952e-06, "loss": 1.1657, "step": 302 }, { "epoch": 0.08358620689655172, "grad_norm": 5.372971057891846, "learning_rate": 3.3406835722160973e-06, "loss": 1.2835, "step": 303 }, { "epoch": 0.08386206896551725, "grad_norm": 4.683228969573975, "learning_rate": 3.3517089305402427e-06, "loss": 1.1025, "step": 304 }, { "epoch": 0.08413793103448276, "grad_norm": 4.760290145874023, "learning_rate": 3.362734288864388e-06, "loss": 1.1189, "step": 305 }, { "epoch": 0.08441379310344828, "grad_norm": 5.272533893585205, "learning_rate": 3.3737596471885335e-06, "loss": 1.4469, "step": 306 }, { "epoch": 0.0846896551724138, "grad_norm": 4.760037422180176, "learning_rate": 3.3847850055126797e-06, "loss": 1.0842, "step": 307 }, { "epoch": 0.08496551724137931, "grad_norm": 4.8198981285095215, "learning_rate": 3.395810363836825e-06, "loss": 1.2398, "step": 308 }, { "epoch": 0.08524137931034483, "grad_norm": 5.2929582595825195, "learning_rate": 3.4068357221609705e-06, "loss": 1.0819, "step": 309 }, { "epoch": 0.08551724137931034, "grad_norm": 4.984736919403076, "learning_rate": 3.417861080485116e-06, "loss": 1.2166, "step": 310 }, { "epoch": 0.08579310344827586, "grad_norm": 5.33635139465332, "learning_rate": 3.4288864388092613e-06, "loss": 1.3305, "step": 311 }, { "epoch": 0.08606896551724139, "grad_norm": 5.46952486038208, "learning_rate": 3.4399117971334075e-06, "loss": 1.291, "step": 312 }, { "epoch": 0.0863448275862069, "grad_norm": 5.990787029266357, "learning_rate": 3.450937155457553e-06, "loss": 1.2616, "step": 313 }, { "epoch": 0.08662068965517242, "grad_norm": 5.733656406402588, "learning_rate": 3.4619625137816982e-06, "loss": 1.2332, "step": 314 }, { "epoch": 0.08689655172413793, "grad_norm": 5.298192977905273, "learning_rate": 3.4729878721058436e-06, "loss": 1.315, "step": 315 }, { "epoch": 0.08717241379310345, "grad_norm": 4.7446722984313965, "learning_rate": 3.4840132304299894e-06, "loss": 1.2777, "step": 316 }, { "epoch": 0.08744827586206896, "grad_norm": 5.606657981872559, "learning_rate": 3.495038588754135e-06, "loss": 1.393, "step": 317 }, { "epoch": 0.08772413793103448, "grad_norm": 4.965888977050781, "learning_rate": 3.50606394707828e-06, "loss": 1.2347, "step": 318 }, { "epoch": 0.088, "grad_norm": 4.797236919403076, "learning_rate": 3.5170893054024256e-06, "loss": 1.3533, "step": 319 }, { "epoch": 0.08827586206896551, "grad_norm": 5.185770034790039, "learning_rate": 3.5281146637265714e-06, "loss": 1.1653, "step": 320 }, { "epoch": 0.08855172413793104, "grad_norm": 5.201143264770508, "learning_rate": 3.539140022050717e-06, "loss": 1.257, "step": 321 }, { "epoch": 0.08882758620689656, "grad_norm": 4.531820297241211, "learning_rate": 3.5501653803748626e-06, "loss": 1.2036, "step": 322 }, { "epoch": 0.08910344827586207, "grad_norm": 5.220597743988037, "learning_rate": 3.561190738699008e-06, "loss": 1.2467, "step": 323 }, { "epoch": 0.08937931034482759, "grad_norm": 4.537539005279541, "learning_rate": 3.5722160970231534e-06, "loss": 1.217, "step": 324 }, { "epoch": 0.0896551724137931, "grad_norm": 4.958594799041748, "learning_rate": 3.5832414553472987e-06, "loss": 1.3481, "step": 325 }, { "epoch": 0.08993103448275862, "grad_norm": 5.631712913513184, "learning_rate": 3.594266813671445e-06, "loss": 1.257, "step": 326 }, { "epoch": 0.09020689655172413, "grad_norm": 5.424651145935059, "learning_rate": 3.6052921719955903e-06, "loss": 1.2961, "step": 327 }, { "epoch": 0.09048275862068965, "grad_norm": 5.801600456237793, "learning_rate": 3.6163175303197357e-06, "loss": 1.391, "step": 328 }, { "epoch": 0.09075862068965518, "grad_norm": 5.454166889190674, "learning_rate": 3.627342888643881e-06, "loss": 1.2567, "step": 329 }, { "epoch": 0.0910344827586207, "grad_norm": 5.588116645812988, "learning_rate": 3.6383682469680265e-06, "loss": 1.0798, "step": 330 }, { "epoch": 0.09131034482758621, "grad_norm": 5.58540153503418, "learning_rate": 3.6493936052921723e-06, "loss": 1.188, "step": 331 }, { "epoch": 0.09158620689655173, "grad_norm": 4.968232154846191, "learning_rate": 3.6604189636163177e-06, "loss": 1.2978, "step": 332 }, { "epoch": 0.09186206896551724, "grad_norm": 4.9044294357299805, "learning_rate": 3.6714443219404635e-06, "loss": 1.3001, "step": 333 }, { "epoch": 0.09213793103448276, "grad_norm": 4.855127334594727, "learning_rate": 3.682469680264609e-06, "loss": 1.2736, "step": 334 }, { "epoch": 0.09241379310344827, "grad_norm": 5.121159553527832, "learning_rate": 3.6934950385887547e-06, "loss": 1.2394, "step": 335 }, { "epoch": 0.09268965517241379, "grad_norm": 4.954476356506348, "learning_rate": 3.7045203969129e-06, "loss": 1.2361, "step": 336 }, { "epoch": 0.09296551724137932, "grad_norm": 5.245781421661377, "learning_rate": 3.7155457552370455e-06, "loss": 1.323, "step": 337 }, { "epoch": 0.09324137931034483, "grad_norm": 5.231593132019043, "learning_rate": 3.726571113561191e-06, "loss": 1.337, "step": 338 }, { "epoch": 0.09351724137931035, "grad_norm": 5.350870132446289, "learning_rate": 3.7375964718853362e-06, "loss": 1.3885, "step": 339 }, { "epoch": 0.09379310344827586, "grad_norm": 5.2299323081970215, "learning_rate": 3.7486218302094825e-06, "loss": 1.3929, "step": 340 }, { "epoch": 0.09406896551724138, "grad_norm": 5.521791934967041, "learning_rate": 3.759647188533628e-06, "loss": 1.2541, "step": 341 }, { "epoch": 0.0943448275862069, "grad_norm": 6.184092044830322, "learning_rate": 3.7706725468577732e-06, "loss": 1.319, "step": 342 }, { "epoch": 0.09462068965517241, "grad_norm": 5.272164821624756, "learning_rate": 3.7816979051819186e-06, "loss": 1.2217, "step": 343 }, { "epoch": 0.09489655172413793, "grad_norm": 4.605432987213135, "learning_rate": 3.792723263506064e-06, "loss": 1.2641, "step": 344 }, { "epoch": 0.09517241379310344, "grad_norm": 5.007249355316162, "learning_rate": 3.80374862183021e-06, "loss": 1.3531, "step": 345 }, { "epoch": 0.09544827586206897, "grad_norm": 5.286783218383789, "learning_rate": 3.814773980154355e-06, "loss": 1.2624, "step": 346 }, { "epoch": 0.09572413793103449, "grad_norm": 5.344627380371094, "learning_rate": 3.8257993384785006e-06, "loss": 1.3482, "step": 347 }, { "epoch": 0.096, "grad_norm": 4.894829273223877, "learning_rate": 3.836824696802646e-06, "loss": 1.2511, "step": 348 }, { "epoch": 0.09627586206896552, "grad_norm": 4.780425071716309, "learning_rate": 3.847850055126791e-06, "loss": 1.2317, "step": 349 }, { "epoch": 0.09655172413793103, "grad_norm": 5.112002849578857, "learning_rate": 3.8588754134509376e-06, "loss": 1.2706, "step": 350 }, { "epoch": 0.09682758620689655, "grad_norm": 4.943282604217529, "learning_rate": 3.869900771775083e-06, "loss": 1.2581, "step": 351 }, { "epoch": 0.09710344827586206, "grad_norm": 4.508307933807373, "learning_rate": 3.880926130099228e-06, "loss": 1.0653, "step": 352 }, { "epoch": 0.09737931034482758, "grad_norm": 4.885964870452881, "learning_rate": 3.891951488423374e-06, "loss": 1.3643, "step": 353 }, { "epoch": 0.09765517241379311, "grad_norm": 5.54771089553833, "learning_rate": 3.90297684674752e-06, "loss": 1.2621, "step": 354 }, { "epoch": 0.09793103448275862, "grad_norm": 5.732326030731201, "learning_rate": 3.914002205071665e-06, "loss": 1.413, "step": 355 }, { "epoch": 0.09820689655172414, "grad_norm": 4.590935230255127, "learning_rate": 3.925027563395811e-06, "loss": 1.1071, "step": 356 }, { "epoch": 0.09848275862068966, "grad_norm": 4.9477858543396, "learning_rate": 3.936052921719956e-06, "loss": 1.1803, "step": 357 }, { "epoch": 0.09875862068965517, "grad_norm": 4.945376396179199, "learning_rate": 3.9470782800441015e-06, "loss": 1.1767, "step": 358 }, { "epoch": 0.09903448275862069, "grad_norm": 5.244919776916504, "learning_rate": 3.958103638368248e-06, "loss": 1.2886, "step": 359 }, { "epoch": 0.0993103448275862, "grad_norm": 4.799619197845459, "learning_rate": 3.969128996692393e-06, "loss": 1.3795, "step": 360 }, { "epoch": 0.09958620689655172, "grad_norm": 4.682539463043213, "learning_rate": 3.9801543550165385e-06, "loss": 1.1856, "step": 361 }, { "epoch": 0.09986206896551725, "grad_norm": 5.159238815307617, "learning_rate": 3.991179713340684e-06, "loss": 1.338, "step": 362 }, { "epoch": 0.10013793103448276, "grad_norm": 5.173552989959717, "learning_rate": 4.002205071664829e-06, "loss": 1.2372, "step": 363 }, { "epoch": 0.10041379310344828, "grad_norm": 5.028811931610107, "learning_rate": 4.0132304299889755e-06, "loss": 1.1549, "step": 364 }, { "epoch": 0.1006896551724138, "grad_norm": 4.875896453857422, "learning_rate": 4.024255788313121e-06, "loss": 1.1818, "step": 365 }, { "epoch": 0.10096551724137931, "grad_norm": 5.157668590545654, "learning_rate": 4.035281146637266e-06, "loss": 1.2158, "step": 366 }, { "epoch": 0.10124137931034483, "grad_norm": 5.048682689666748, "learning_rate": 4.046306504961412e-06, "loss": 1.1608, "step": 367 }, { "epoch": 0.10151724137931034, "grad_norm": 4.598012447357178, "learning_rate": 4.057331863285557e-06, "loss": 1.234, "step": 368 }, { "epoch": 0.10179310344827586, "grad_norm": 4.805130481719971, "learning_rate": 4.068357221609703e-06, "loss": 1.1785, "step": 369 }, { "epoch": 0.10206896551724139, "grad_norm": 4.841047763824463, "learning_rate": 4.079382579933849e-06, "loss": 1.1167, "step": 370 }, { "epoch": 0.1023448275862069, "grad_norm": 5.087856769561768, "learning_rate": 4.090407938257994e-06, "loss": 1.136, "step": 371 }, { "epoch": 0.10262068965517242, "grad_norm": 5.1338934898376465, "learning_rate": 4.101433296582139e-06, "loss": 1.2212, "step": 372 }, { "epoch": 0.10289655172413793, "grad_norm": 5.208235263824463, "learning_rate": 4.112458654906285e-06, "loss": 1.2068, "step": 373 }, { "epoch": 0.10317241379310345, "grad_norm": 5.0124359130859375, "learning_rate": 4.12348401323043e-06, "loss": 1.2393, "step": 374 }, { "epoch": 0.10344827586206896, "grad_norm": 5.02834939956665, "learning_rate": 4.1345093715545755e-06, "loss": 1.2962, "step": 375 }, { "epoch": 0.10372413793103448, "grad_norm": 5.413354873657227, "learning_rate": 4.145534729878721e-06, "loss": 1.187, "step": 376 }, { "epoch": 0.104, "grad_norm": 4.966907024383545, "learning_rate": 4.156560088202866e-06, "loss": 1.379, "step": 377 }, { "epoch": 0.10427586206896551, "grad_norm": 4.817436218261719, "learning_rate": 4.1675854465270125e-06, "loss": 1.2478, "step": 378 }, { "epoch": 0.10455172413793104, "grad_norm": 4.856457233428955, "learning_rate": 4.178610804851158e-06, "loss": 1.2737, "step": 379 }, { "epoch": 0.10482758620689656, "grad_norm": 4.881834506988525, "learning_rate": 4.189636163175303e-06, "loss": 1.4044, "step": 380 }, { "epoch": 0.10510344827586207, "grad_norm": 4.449133396148682, "learning_rate": 4.200661521499449e-06, "loss": 1.1379, "step": 381 }, { "epoch": 0.10537931034482759, "grad_norm": 4.482323169708252, "learning_rate": 4.211686879823594e-06, "loss": 1.2168, "step": 382 }, { "epoch": 0.1056551724137931, "grad_norm": 4.864123821258545, "learning_rate": 4.22271223814774e-06, "loss": 1.2661, "step": 383 }, { "epoch": 0.10593103448275862, "grad_norm": 5.493534088134766, "learning_rate": 4.233737596471886e-06, "loss": 1.192, "step": 384 }, { "epoch": 0.10620689655172413, "grad_norm": 5.449899673461914, "learning_rate": 4.244762954796031e-06, "loss": 1.2232, "step": 385 }, { "epoch": 0.10648275862068965, "grad_norm": 5.509608268737793, "learning_rate": 4.2557883131201764e-06, "loss": 1.3118, "step": 386 }, { "epoch": 0.10675862068965518, "grad_norm": 4.935935974121094, "learning_rate": 4.266813671444322e-06, "loss": 1.2033, "step": 387 }, { "epoch": 0.1070344827586207, "grad_norm": 5.379591464996338, "learning_rate": 4.277839029768468e-06, "loss": 1.2897, "step": 388 }, { "epoch": 0.10731034482758621, "grad_norm": 4.95690393447876, "learning_rate": 4.2888643880926134e-06, "loss": 1.2388, "step": 389 }, { "epoch": 0.10758620689655173, "grad_norm": 5.065435409545898, "learning_rate": 4.299889746416759e-06, "loss": 1.271, "step": 390 }, { "epoch": 0.10786206896551724, "grad_norm": 5.7695136070251465, "learning_rate": 4.310915104740904e-06, "loss": 1.2785, "step": 391 }, { "epoch": 0.10813793103448276, "grad_norm": 5.116886615753174, "learning_rate": 4.3219404630650504e-06, "loss": 1.4007, "step": 392 }, { "epoch": 0.10841379310344827, "grad_norm": 5.032015323638916, "learning_rate": 4.332965821389196e-06, "loss": 1.2583, "step": 393 }, { "epoch": 0.10868965517241379, "grad_norm": 4.542198181152344, "learning_rate": 4.343991179713341e-06, "loss": 1.3115, "step": 394 }, { "epoch": 0.10896551724137932, "grad_norm": 4.57611608505249, "learning_rate": 4.355016538037487e-06, "loss": 1.2577, "step": 395 }, { "epoch": 0.10924137931034483, "grad_norm": 4.712037563323975, "learning_rate": 4.366041896361632e-06, "loss": 1.2949, "step": 396 }, { "epoch": 0.10951724137931035, "grad_norm": 5.009523868560791, "learning_rate": 4.377067254685778e-06, "loss": 1.2302, "step": 397 }, { "epoch": 0.10979310344827586, "grad_norm": 5.209190845489502, "learning_rate": 4.388092613009924e-06, "loss": 1.1515, "step": 398 }, { "epoch": 0.11006896551724138, "grad_norm": 5.029942035675049, "learning_rate": 4.399117971334069e-06, "loss": 1.317, "step": 399 }, { "epoch": 0.1103448275862069, "grad_norm": 4.94606351852417, "learning_rate": 4.410143329658214e-06, "loss": 1.1917, "step": 400 }, { "epoch": 0.11062068965517241, "grad_norm": 5.03632116317749, "learning_rate": 4.42116868798236e-06, "loss": 1.1727, "step": 401 }, { "epoch": 0.11089655172413793, "grad_norm": 5.295674800872803, "learning_rate": 4.432194046306505e-06, "loss": 1.2237, "step": 402 }, { "epoch": 0.11117241379310344, "grad_norm": 4.985062599182129, "learning_rate": 4.443219404630651e-06, "loss": 1.2485, "step": 403 }, { "epoch": 0.11144827586206897, "grad_norm": 4.766179084777832, "learning_rate": 4.454244762954797e-06, "loss": 1.1121, "step": 404 }, { "epoch": 0.11172413793103449, "grad_norm": 4.562309265136719, "learning_rate": 4.465270121278942e-06, "loss": 1.3395, "step": 405 }, { "epoch": 0.112, "grad_norm": 5.0303635597229, "learning_rate": 4.4762954796030875e-06, "loss": 1.2479, "step": 406 }, { "epoch": 0.11227586206896552, "grad_norm": 5.17652702331543, "learning_rate": 4.487320837927233e-06, "loss": 1.2247, "step": 407 }, { "epoch": 0.11255172413793103, "grad_norm": 4.693912029266357, "learning_rate": 4.498346196251378e-06, "loss": 1.304, "step": 408 }, { "epoch": 0.11282758620689655, "grad_norm": 5.24094295501709, "learning_rate": 4.509371554575524e-06, "loss": 1.3787, "step": 409 }, { "epoch": 0.11310344827586206, "grad_norm": 5.102416515350342, "learning_rate": 4.520396912899669e-06, "loss": 1.2052, "step": 410 }, { "epoch": 0.11337931034482758, "grad_norm": 5.039400577545166, "learning_rate": 4.531422271223815e-06, "loss": 1.2637, "step": 411 }, { "epoch": 0.11365517241379311, "grad_norm": 4.976189613342285, "learning_rate": 4.542447629547961e-06, "loss": 1.1826, "step": 412 }, { "epoch": 0.11393103448275863, "grad_norm": 5.571681022644043, "learning_rate": 4.553472987872106e-06, "loss": 1.3889, "step": 413 }, { "epoch": 0.11420689655172414, "grad_norm": 5.105461120605469, "learning_rate": 4.564498346196251e-06, "loss": 1.2584, "step": 414 }, { "epoch": 0.11448275862068966, "grad_norm": 4.901206016540527, "learning_rate": 4.575523704520397e-06, "loss": 1.3133, "step": 415 }, { "epoch": 0.11475862068965517, "grad_norm": 4.756040096282959, "learning_rate": 4.586549062844543e-06, "loss": 1.1926, "step": 416 }, { "epoch": 0.11503448275862069, "grad_norm": 4.287547588348389, "learning_rate": 4.597574421168688e-06, "loss": 1.0915, "step": 417 }, { "epoch": 0.1153103448275862, "grad_norm": 4.883122444152832, "learning_rate": 4.608599779492834e-06, "loss": 1.3632, "step": 418 }, { "epoch": 0.11558620689655172, "grad_norm": 4.753848075866699, "learning_rate": 4.619625137816979e-06, "loss": 1.1412, "step": 419 }, { "epoch": 0.11586206896551725, "grad_norm": 5.27369499206543, "learning_rate": 4.6306504961411246e-06, "loss": 1.1803, "step": 420 }, { "epoch": 0.11613793103448276, "grad_norm": 4.862664699554443, "learning_rate": 4.641675854465271e-06, "loss": 1.2862, "step": 421 }, { "epoch": 0.11641379310344828, "grad_norm": 4.815572261810303, "learning_rate": 4.652701212789416e-06, "loss": 1.3457, "step": 422 }, { "epoch": 0.1166896551724138, "grad_norm": 5.848683834075928, "learning_rate": 4.6637265711135616e-06, "loss": 1.4446, "step": 423 }, { "epoch": 0.11696551724137931, "grad_norm": 4.645429611206055, "learning_rate": 4.674751929437707e-06, "loss": 1.2383, "step": 424 }, { "epoch": 0.11724137931034483, "grad_norm": 5.023048400878906, "learning_rate": 4.685777287761852e-06, "loss": 1.252, "step": 425 }, { "epoch": 0.11751724137931034, "grad_norm": 4.514637470245361, "learning_rate": 4.6968026460859986e-06, "loss": 1.1638, "step": 426 }, { "epoch": 0.11779310344827586, "grad_norm": 4.303399085998535, "learning_rate": 4.707828004410144e-06, "loss": 1.1083, "step": 427 }, { "epoch": 0.11806896551724137, "grad_norm": 4.780041694641113, "learning_rate": 4.718853362734289e-06, "loss": 1.3642, "step": 428 }, { "epoch": 0.1183448275862069, "grad_norm": 5.015865325927734, "learning_rate": 4.729878721058435e-06, "loss": 1.2875, "step": 429 }, { "epoch": 0.11862068965517242, "grad_norm": 5.210927963256836, "learning_rate": 4.740904079382581e-06, "loss": 1.2395, "step": 430 }, { "epoch": 0.11889655172413793, "grad_norm": 5.154764175415039, "learning_rate": 4.751929437706726e-06, "loss": 1.262, "step": 431 }, { "epoch": 0.11917241379310345, "grad_norm": 5.233216285705566, "learning_rate": 4.762954796030872e-06, "loss": 1.3201, "step": 432 }, { "epoch": 0.11944827586206896, "grad_norm": 4.8820881843566895, "learning_rate": 4.773980154355017e-06, "loss": 1.1937, "step": 433 }, { "epoch": 0.11972413793103448, "grad_norm": 4.819260120391846, "learning_rate": 4.7850055126791625e-06, "loss": 1.1567, "step": 434 }, { "epoch": 0.12, "grad_norm": 4.36835241317749, "learning_rate": 4.796030871003308e-06, "loss": 1.1133, "step": 435 }, { "epoch": 0.12027586206896551, "grad_norm": 4.9776835441589355, "learning_rate": 4.807056229327453e-06, "loss": 1.295, "step": 436 }, { "epoch": 0.12055172413793104, "grad_norm": 4.739013195037842, "learning_rate": 4.8180815876515995e-06, "loss": 1.2107, "step": 437 }, { "epoch": 0.12082758620689656, "grad_norm": 4.95115327835083, "learning_rate": 4.829106945975745e-06, "loss": 1.4937, "step": 438 }, { "epoch": 0.12110344827586207, "grad_norm": 5.1152520179748535, "learning_rate": 4.84013230429989e-06, "loss": 1.2082, "step": 439 }, { "epoch": 0.12137931034482759, "grad_norm": 4.630075931549072, "learning_rate": 4.851157662624036e-06, "loss": 1.2922, "step": 440 }, { "epoch": 0.1216551724137931, "grad_norm": 4.530563831329346, "learning_rate": 4.862183020948181e-06, "loss": 1.2951, "step": 441 }, { "epoch": 0.12193103448275862, "grad_norm": 4.462390422821045, "learning_rate": 4.873208379272326e-06, "loss": 1.2936, "step": 442 }, { "epoch": 0.12220689655172413, "grad_norm": 4.948861122131348, "learning_rate": 4.884233737596472e-06, "loss": 1.2285, "step": 443 }, { "epoch": 0.12248275862068965, "grad_norm": 5.523327350616455, "learning_rate": 4.895259095920617e-06, "loss": 1.3793, "step": 444 }, { "epoch": 0.12275862068965518, "grad_norm": 4.957507610321045, "learning_rate": 4.906284454244763e-06, "loss": 1.2423, "step": 445 }, { "epoch": 0.1230344827586207, "grad_norm": 4.5526204109191895, "learning_rate": 4.917309812568909e-06, "loss": 1.1246, "step": 446 }, { "epoch": 0.12331034482758621, "grad_norm": 4.4550862312316895, "learning_rate": 4.928335170893054e-06, "loss": 1.1514, "step": 447 }, { "epoch": 0.12358620689655173, "grad_norm": 4.959444522857666, "learning_rate": 4.9393605292171995e-06, "loss": 1.196, "step": 448 }, { "epoch": 0.12386206896551724, "grad_norm": 4.8688645362854, "learning_rate": 4.950385887541346e-06, "loss": 1.1524, "step": 449 }, { "epoch": 0.12413793103448276, "grad_norm": 5.443241119384766, "learning_rate": 4.961411245865491e-06, "loss": 1.371, "step": 450 }, { "epoch": 0.12441379310344827, "grad_norm": 5.5229620933532715, "learning_rate": 4.9724366041896365e-06, "loss": 1.3681, "step": 451 }, { "epoch": 0.12468965517241379, "grad_norm": 5.195013999938965, "learning_rate": 4.983461962513782e-06, "loss": 1.2753, "step": 452 }, { "epoch": 0.1249655172413793, "grad_norm": 5.195214748382568, "learning_rate": 4.994487320837927e-06, "loss": 1.3545, "step": 453 }, { "epoch": 0.12524137931034482, "grad_norm": 4.731790065765381, "learning_rate": 5.0055126791620735e-06, "loss": 1.2498, "step": 454 }, { "epoch": 0.12551724137931033, "grad_norm": 4.637590408325195, "learning_rate": 5.016538037486218e-06, "loss": 1.3838, "step": 455 }, { "epoch": 0.12579310344827585, "grad_norm": 4.835601329803467, "learning_rate": 5.027563395810364e-06, "loss": 1.3716, "step": 456 }, { "epoch": 0.12606896551724137, "grad_norm": 4.948855876922607, "learning_rate": 5.0385887541345105e-06, "loss": 1.263, "step": 457 }, { "epoch": 0.1263448275862069, "grad_norm": 5.063255310058594, "learning_rate": 5.049614112458655e-06, "loss": 1.2434, "step": 458 }, { "epoch": 0.12662068965517242, "grad_norm": 4.6464619636535645, "learning_rate": 5.060639470782801e-06, "loss": 1.2261, "step": 459 }, { "epoch": 0.12689655172413794, "grad_norm": 5.1001482009887695, "learning_rate": 5.071664829106946e-06, "loss": 1.3607, "step": 460 }, { "epoch": 0.12717241379310346, "grad_norm": 4.3373236656188965, "learning_rate": 5.082690187431092e-06, "loss": 1.2054, "step": 461 }, { "epoch": 0.12744827586206897, "grad_norm": 4.803186416625977, "learning_rate": 5.0937155457552374e-06, "loss": 1.2912, "step": 462 }, { "epoch": 0.1277241379310345, "grad_norm": 4.351535320281982, "learning_rate": 5.104740904079383e-06, "loss": 1.215, "step": 463 }, { "epoch": 0.128, "grad_norm": 5.27277946472168, "learning_rate": 5.115766262403529e-06, "loss": 1.4811, "step": 464 }, { "epoch": 0.12827586206896552, "grad_norm": 4.8038153648376465, "learning_rate": 5.126791620727674e-06, "loss": 1.3922, "step": 465 }, { "epoch": 0.12855172413793103, "grad_norm": 4.8326616287231445, "learning_rate": 5.13781697905182e-06, "loss": 1.2753, "step": 466 }, { "epoch": 0.12882758620689655, "grad_norm": 4.52681827545166, "learning_rate": 5.148842337375965e-06, "loss": 1.2715, "step": 467 }, { "epoch": 0.12910344827586206, "grad_norm": 4.575504779815674, "learning_rate": 5.159867695700111e-06, "loss": 1.2463, "step": 468 }, { "epoch": 0.12937931034482758, "grad_norm": 4.833202838897705, "learning_rate": 5.170893054024256e-06, "loss": 1.1892, "step": 469 }, { "epoch": 0.1296551724137931, "grad_norm": 4.303750991821289, "learning_rate": 5.181918412348401e-06, "loss": 1.3115, "step": 470 }, { "epoch": 0.1299310344827586, "grad_norm": 4.400238037109375, "learning_rate": 5.192943770672548e-06, "loss": 1.1016, "step": 471 }, { "epoch": 0.13020689655172413, "grad_norm": 4.7128705978393555, "learning_rate": 5.203969128996693e-06, "loss": 1.1749, "step": 472 }, { "epoch": 0.13048275862068964, "grad_norm": 5.206439018249512, "learning_rate": 5.214994487320838e-06, "loss": 1.2318, "step": 473 }, { "epoch": 0.1307586206896552, "grad_norm": 4.874387741088867, "learning_rate": 5.226019845644984e-06, "loss": 1.2357, "step": 474 }, { "epoch": 0.1310344827586207, "grad_norm": 4.816194534301758, "learning_rate": 5.23704520396913e-06, "loss": 1.2769, "step": 475 }, { "epoch": 0.13131034482758622, "grad_norm": 5.541711330413818, "learning_rate": 5.2480705622932745e-06, "loss": 1.2872, "step": 476 }, { "epoch": 0.13158620689655173, "grad_norm": 4.75552225112915, "learning_rate": 5.259095920617421e-06, "loss": 1.2271, "step": 477 }, { "epoch": 0.13186206896551725, "grad_norm": 5.999216556549072, "learning_rate": 5.270121278941565e-06, "loss": 1.2203, "step": 478 }, { "epoch": 0.13213793103448276, "grad_norm": 4.71449613571167, "learning_rate": 5.2811466372657115e-06, "loss": 1.2111, "step": 479 }, { "epoch": 0.13241379310344828, "grad_norm": 5.149560451507568, "learning_rate": 5.292171995589858e-06, "loss": 1.5682, "step": 480 }, { "epoch": 0.1326896551724138, "grad_norm": 4.93344783782959, "learning_rate": 5.303197353914002e-06, "loss": 1.2977, "step": 481 }, { "epoch": 0.1329655172413793, "grad_norm": 4.688499927520752, "learning_rate": 5.3142227122381485e-06, "loss": 1.1477, "step": 482 }, { "epoch": 0.13324137931034483, "grad_norm": 5.023465633392334, "learning_rate": 5.325248070562293e-06, "loss": 1.2256, "step": 483 }, { "epoch": 0.13351724137931034, "grad_norm": 4.940195560455322, "learning_rate": 5.336273428886439e-06, "loss": 1.2391, "step": 484 }, { "epoch": 0.13379310344827586, "grad_norm": 4.769899845123291, "learning_rate": 5.3472987872105855e-06, "loss": 1.3546, "step": 485 }, { "epoch": 0.13406896551724137, "grad_norm": 4.778186798095703, "learning_rate": 5.35832414553473e-06, "loss": 1.2805, "step": 486 }, { "epoch": 0.1343448275862069, "grad_norm": 4.653156280517578, "learning_rate": 5.369349503858876e-06, "loss": 1.1503, "step": 487 }, { "epoch": 0.1346206896551724, "grad_norm": 4.578808784484863, "learning_rate": 5.380374862183021e-06, "loss": 1.1203, "step": 488 }, { "epoch": 0.13489655172413792, "grad_norm": 4.4978928565979, "learning_rate": 5.391400220507167e-06, "loss": 1.1322, "step": 489 }, { "epoch": 0.13517241379310344, "grad_norm": 5.429559707641602, "learning_rate": 5.402425578831313e-06, "loss": 1.2789, "step": 490 }, { "epoch": 0.13544827586206898, "grad_norm": 5.245391368865967, "learning_rate": 5.413450937155458e-06, "loss": 1.3628, "step": 491 }, { "epoch": 0.1357241379310345, "grad_norm": 5.165404319763184, "learning_rate": 5.424476295479604e-06, "loss": 1.1711, "step": 492 }, { "epoch": 0.136, "grad_norm": 4.731152057647705, "learning_rate": 5.4355016538037486e-06, "loss": 1.1463, "step": 493 }, { "epoch": 0.13627586206896553, "grad_norm": 5.054938793182373, "learning_rate": 5.446527012127895e-06, "loss": 1.2961, "step": 494 }, { "epoch": 0.13655172413793104, "grad_norm": 4.6369194984436035, "learning_rate": 5.45755237045204e-06, "loss": 1.3245, "step": 495 }, { "epoch": 0.13682758620689656, "grad_norm": 4.512877941131592, "learning_rate": 5.4685777287761856e-06, "loss": 1.2551, "step": 496 }, { "epoch": 0.13710344827586207, "grad_norm": 5.399054527282715, "learning_rate": 5.479603087100332e-06, "loss": 1.2841, "step": 497 }, { "epoch": 0.1373793103448276, "grad_norm": 4.711450099945068, "learning_rate": 5.490628445424476e-06, "loss": 1.2922, "step": 498 }, { "epoch": 0.1376551724137931, "grad_norm": 4.553782939910889, "learning_rate": 5.5016538037486226e-06, "loss": 1.2497, "step": 499 }, { "epoch": 0.13793103448275862, "grad_norm": 4.884493827819824, "learning_rate": 5.512679162072768e-06, "loss": 1.3095, "step": 500 }, { "epoch": 0.13793103448275862, "eval_loss": 1.306488037109375, "eval_runtime": 11.6045, "eval_samples_per_second": 34.469, "eval_steps_per_second": 4.309, "step": 500 }, { "epoch": 0.13820689655172413, "grad_norm": 4.638160705566406, "learning_rate": 5.523704520396913e-06, "loss": 1.2453, "step": 501 }, { "epoch": 0.13848275862068965, "grad_norm": 4.866035461425781, "learning_rate": 5.534729878721059e-06, "loss": 1.354, "step": 502 }, { "epoch": 0.13875862068965517, "grad_norm": 4.929218769073486, "learning_rate": 5.545755237045204e-06, "loss": 1.3174, "step": 503 }, { "epoch": 0.13903448275862068, "grad_norm": 4.698100566864014, "learning_rate": 5.5567805953693495e-06, "loss": 1.1545, "step": 504 }, { "epoch": 0.1393103448275862, "grad_norm": 4.780077934265137, "learning_rate": 5.567805953693496e-06, "loss": 1.3495, "step": 505 }, { "epoch": 0.1395862068965517, "grad_norm": 4.646601676940918, "learning_rate": 5.578831312017641e-06, "loss": 1.2962, "step": 506 }, { "epoch": 0.13986206896551723, "grad_norm": 4.70329475402832, "learning_rate": 5.5898566703417865e-06, "loss": 1.253, "step": 507 }, { "epoch": 0.14013793103448277, "grad_norm": 5.172776699066162, "learning_rate": 5.600882028665932e-06, "loss": 1.137, "step": 508 }, { "epoch": 0.1404137931034483, "grad_norm": 4.8419575691223145, "learning_rate": 5.611907386990077e-06, "loss": 1.3924, "step": 509 }, { "epoch": 0.1406896551724138, "grad_norm": 4.760987758636475, "learning_rate": 5.6229327453142235e-06, "loss": 1.4245, "step": 510 }, { "epoch": 0.14096551724137932, "grad_norm": 5.203549861907959, "learning_rate": 5.633958103638368e-06, "loss": 1.4459, "step": 511 }, { "epoch": 0.14124137931034483, "grad_norm": 4.965125560760498, "learning_rate": 5.644983461962514e-06, "loss": 1.4186, "step": 512 }, { "epoch": 0.14151724137931035, "grad_norm": 4.322177886962891, "learning_rate": 5.6560088202866605e-06, "loss": 1.2672, "step": 513 }, { "epoch": 0.14179310344827586, "grad_norm": 5.945695400238037, "learning_rate": 5.667034178610805e-06, "loss": 1.3694, "step": 514 }, { "epoch": 0.14206896551724138, "grad_norm": 4.725958824157715, "learning_rate": 5.678059536934951e-06, "loss": 1.1937, "step": 515 }, { "epoch": 0.1423448275862069, "grad_norm": 4.7618727684021, "learning_rate": 5.689084895259096e-06, "loss": 1.1924, "step": 516 }, { "epoch": 0.1426206896551724, "grad_norm": 4.780855655670166, "learning_rate": 5.700110253583242e-06, "loss": 1.1776, "step": 517 }, { "epoch": 0.14289655172413793, "grad_norm": 4.921565055847168, "learning_rate": 5.711135611907388e-06, "loss": 1.3397, "step": 518 }, { "epoch": 0.14317241379310344, "grad_norm": 5.675736427307129, "learning_rate": 5.722160970231533e-06, "loss": 1.147, "step": 519 }, { "epoch": 0.14344827586206896, "grad_norm": 4.749997138977051, "learning_rate": 5.733186328555679e-06, "loss": 1.3315, "step": 520 }, { "epoch": 0.14372413793103447, "grad_norm": 4.278105735778809, "learning_rate": 5.7442116868798235e-06, "loss": 1.2252, "step": 521 }, { "epoch": 0.144, "grad_norm": 4.704592227935791, "learning_rate": 5.75523704520397e-06, "loss": 1.3098, "step": 522 }, { "epoch": 0.1442758620689655, "grad_norm": 5.139385223388672, "learning_rate": 5.766262403528116e-06, "loss": 1.3197, "step": 523 }, { "epoch": 0.14455172413793105, "grad_norm": 4.707585334777832, "learning_rate": 5.7772877618522605e-06, "loss": 1.3309, "step": 524 }, { "epoch": 0.14482758620689656, "grad_norm": 4.73273229598999, "learning_rate": 5.788313120176407e-06, "loss": 1.1081, "step": 525 }, { "epoch": 0.14510344827586208, "grad_norm": 4.9469895362854, "learning_rate": 5.799338478500551e-06, "loss": 1.2944, "step": 526 }, { "epoch": 0.1453793103448276, "grad_norm": 4.401021957397461, "learning_rate": 5.8103638368246975e-06, "loss": 1.0735, "step": 527 }, { "epoch": 0.1456551724137931, "grad_norm": 4.7508931159973145, "learning_rate": 5.821389195148843e-06, "loss": 1.3302, "step": 528 }, { "epoch": 0.14593103448275863, "grad_norm": 4.501974105834961, "learning_rate": 5.832414553472988e-06, "loss": 1.2811, "step": 529 }, { "epoch": 0.14620689655172414, "grad_norm": 4.933117866516113, "learning_rate": 5.8434399117971345e-06, "loss": 1.2257, "step": 530 }, { "epoch": 0.14648275862068966, "grad_norm": 4.587188243865967, "learning_rate": 5.854465270121279e-06, "loss": 1.2132, "step": 531 }, { "epoch": 0.14675862068965517, "grad_norm": 4.617749214172363, "learning_rate": 5.865490628445425e-06, "loss": 1.4299, "step": 532 }, { "epoch": 0.1470344827586207, "grad_norm": 4.590085029602051, "learning_rate": 5.876515986769571e-06, "loss": 1.1696, "step": 533 }, { "epoch": 0.1473103448275862, "grad_norm": 4.458635330200195, "learning_rate": 5.887541345093716e-06, "loss": 1.2767, "step": 534 }, { "epoch": 0.14758620689655172, "grad_norm": 4.737541198730469, "learning_rate": 5.8985667034178614e-06, "loss": 1.3044, "step": 535 }, { "epoch": 0.14786206896551723, "grad_norm": 5.263787269592285, "learning_rate": 5.909592061742007e-06, "loss": 1.3254, "step": 536 }, { "epoch": 0.14813793103448275, "grad_norm": 5.034799575805664, "learning_rate": 5.920617420066152e-06, "loss": 1.2706, "step": 537 }, { "epoch": 0.14841379310344827, "grad_norm": 5.241380214691162, "learning_rate": 5.9316427783902984e-06, "loss": 1.3718, "step": 538 }, { "epoch": 0.14868965517241378, "grad_norm": 4.760156631469727, "learning_rate": 5.942668136714444e-06, "loss": 1.274, "step": 539 }, { "epoch": 0.1489655172413793, "grad_norm": 5.01708984375, "learning_rate": 5.953693495038589e-06, "loss": 1.4341, "step": 540 }, { "epoch": 0.14924137931034484, "grad_norm": 4.978157043457031, "learning_rate": 5.964718853362735e-06, "loss": 1.233, "step": 541 }, { "epoch": 0.14951724137931036, "grad_norm": 4.481117248535156, "learning_rate": 5.97574421168688e-06, "loss": 1.3059, "step": 542 }, { "epoch": 0.14979310344827587, "grad_norm": 4.228063583374023, "learning_rate": 5.986769570011026e-06, "loss": 1.3342, "step": 543 }, { "epoch": 0.1500689655172414, "grad_norm": 4.932047367095947, "learning_rate": 5.997794928335171e-06, "loss": 1.398, "step": 544 }, { "epoch": 0.1503448275862069, "grad_norm": 4.8426361083984375, "learning_rate": 6.008820286659317e-06, "loss": 1.2527, "step": 545 }, { "epoch": 0.15062068965517242, "grad_norm": 5.720081806182861, "learning_rate": 6.0198456449834615e-06, "loss": 1.4574, "step": 546 }, { "epoch": 0.15089655172413793, "grad_norm": 4.709903717041016, "learning_rate": 6.030871003307608e-06, "loss": 1.3341, "step": 547 }, { "epoch": 0.15117241379310345, "grad_norm": 4.691115379333496, "learning_rate": 6.041896361631754e-06, "loss": 1.2437, "step": 548 }, { "epoch": 0.15144827586206897, "grad_norm": 4.612683296203613, "learning_rate": 6.0529217199558985e-06, "loss": 1.3793, "step": 549 }, { "epoch": 0.15172413793103448, "grad_norm": 4.5940656661987305, "learning_rate": 6.063947078280045e-06, "loss": 1.2282, "step": 550 }, { "epoch": 0.152, "grad_norm": 4.558703899383545, "learning_rate": 6.074972436604191e-06, "loss": 1.3409, "step": 551 }, { "epoch": 0.1522758620689655, "grad_norm": 4.620532512664795, "learning_rate": 6.0859977949283355e-06, "loss": 1.241, "step": 552 }, { "epoch": 0.15255172413793103, "grad_norm": 4.250223636627197, "learning_rate": 6.097023153252482e-06, "loss": 1.1016, "step": 553 }, { "epoch": 0.15282758620689654, "grad_norm": 4.6867570877075195, "learning_rate": 6.108048511576626e-06, "loss": 1.1766, "step": 554 }, { "epoch": 0.15310344827586206, "grad_norm": 4.988593578338623, "learning_rate": 6.1190738699007725e-06, "loss": 1.3648, "step": 555 }, { "epoch": 0.15337931034482757, "grad_norm": 4.895255088806152, "learning_rate": 6.130099228224919e-06, "loss": 1.3141, "step": 556 }, { "epoch": 0.1536551724137931, "grad_norm": 4.866540908813477, "learning_rate": 6.141124586549063e-06, "loss": 1.1486, "step": 557 }, { "epoch": 0.15393103448275863, "grad_norm": 4.694727897644043, "learning_rate": 6.1521499448732095e-06, "loss": 1.2881, "step": 558 }, { "epoch": 0.15420689655172415, "grad_norm": 4.402827262878418, "learning_rate": 6.163175303197354e-06, "loss": 1.1648, "step": 559 }, { "epoch": 0.15448275862068966, "grad_norm": 4.135559558868408, "learning_rate": 6.1742006615215e-06, "loss": 0.9983, "step": 560 }, { "epoch": 0.15475862068965518, "grad_norm": 4.821647644042969, "learning_rate": 6.185226019845646e-06, "loss": 1.3348, "step": 561 }, { "epoch": 0.1550344827586207, "grad_norm": 4.710869789123535, "learning_rate": 6.196251378169791e-06, "loss": 1.3593, "step": 562 }, { "epoch": 0.1553103448275862, "grad_norm": 4.825790882110596, "learning_rate": 6.207276736493936e-06, "loss": 1.2511, "step": 563 }, { "epoch": 0.15558620689655173, "grad_norm": 4.726259231567383, "learning_rate": 6.218302094818082e-06, "loss": 1.1246, "step": 564 }, { "epoch": 0.15586206896551724, "grad_norm": 4.946377277374268, "learning_rate": 6.229327453142228e-06, "loss": 1.2783, "step": 565 }, { "epoch": 0.15613793103448276, "grad_norm": 5.13456392288208, "learning_rate": 6.240352811466373e-06, "loss": 1.2546, "step": 566 }, { "epoch": 0.15641379310344827, "grad_norm": 4.670046806335449, "learning_rate": 6.251378169790519e-06, "loss": 1.2407, "step": 567 }, { "epoch": 0.1566896551724138, "grad_norm": 5.676980972290039, "learning_rate": 6.262403528114664e-06, "loss": 1.4335, "step": 568 }, { "epoch": 0.1569655172413793, "grad_norm": 4.951840877532959, "learning_rate": 6.2734288864388096e-06, "loss": 1.3196, "step": 569 }, { "epoch": 0.15724137931034482, "grad_norm": 4.855698585510254, "learning_rate": 6.284454244762955e-06, "loss": 1.3577, "step": 570 }, { "epoch": 0.15751724137931034, "grad_norm": 4.821094036102295, "learning_rate": 6.295479603087101e-06, "loss": 1.2517, "step": 571 }, { "epoch": 0.15779310344827585, "grad_norm": 4.755015850067139, "learning_rate": 6.306504961411246e-06, "loss": 1.2237, "step": 572 }, { "epoch": 0.15806896551724137, "grad_norm": 4.3604865074157715, "learning_rate": 6.317530319735392e-06, "loss": 1.2729, "step": 573 }, { "epoch": 0.1583448275862069, "grad_norm": 4.7963995933532715, "learning_rate": 6.328555678059537e-06, "loss": 1.3184, "step": 574 }, { "epoch": 0.15862068965517243, "grad_norm": 4.978405475616455, "learning_rate": 6.339581036383683e-06, "loss": 1.3914, "step": 575 }, { "epoch": 0.15889655172413794, "grad_norm": 4.6449384689331055, "learning_rate": 6.350606394707829e-06, "loss": 1.288, "step": 576 }, { "epoch": 0.15917241379310346, "grad_norm": 4.535279273986816, "learning_rate": 6.3616317530319735e-06, "loss": 1.2329, "step": 577 }, { "epoch": 0.15944827586206897, "grad_norm": 5.131060600280762, "learning_rate": 6.37265711135612e-06, "loss": 1.3551, "step": 578 }, { "epoch": 0.1597241379310345, "grad_norm": 4.424389839172363, "learning_rate": 6.383682469680264e-06, "loss": 1.2326, "step": 579 }, { "epoch": 0.16, "grad_norm": 4.592067718505859, "learning_rate": 6.3947078280044105e-06, "loss": 1.2814, "step": 580 }, { "epoch": 0.16027586206896552, "grad_norm": 4.625774383544922, "learning_rate": 6.405733186328557e-06, "loss": 1.4929, "step": 581 }, { "epoch": 0.16055172413793103, "grad_norm": 4.558657169342041, "learning_rate": 6.416758544652701e-06, "loss": 1.2475, "step": 582 }, { "epoch": 0.16082758620689655, "grad_norm": 4.66735315322876, "learning_rate": 6.4277839029768475e-06, "loss": 1.255, "step": 583 }, { "epoch": 0.16110344827586207, "grad_norm": 4.916066646575928, "learning_rate": 6.438809261300992e-06, "loss": 1.2616, "step": 584 }, { "epoch": 0.16137931034482758, "grad_norm": 4.776900768280029, "learning_rate": 6.449834619625138e-06, "loss": 1.3085, "step": 585 }, { "epoch": 0.1616551724137931, "grad_norm": 5.221232891082764, "learning_rate": 6.4608599779492845e-06, "loss": 1.3384, "step": 586 }, { "epoch": 0.1619310344827586, "grad_norm": 5.2406439781188965, "learning_rate": 6.471885336273429e-06, "loss": 1.2534, "step": 587 }, { "epoch": 0.16220689655172413, "grad_norm": 4.8229193687438965, "learning_rate": 6.482910694597575e-06, "loss": 1.3187, "step": 588 }, { "epoch": 0.16248275862068964, "grad_norm": 4.803974628448486, "learning_rate": 6.493936052921721e-06, "loss": 1.3172, "step": 589 }, { "epoch": 0.16275862068965516, "grad_norm": 4.6014628410339355, "learning_rate": 6.504961411245866e-06, "loss": 1.2451, "step": 590 }, { "epoch": 0.1630344827586207, "grad_norm": 4.225053310394287, "learning_rate": 6.515986769570012e-06, "loss": 1.1866, "step": 591 }, { "epoch": 0.16331034482758622, "grad_norm": 4.750767707824707, "learning_rate": 6.527012127894157e-06, "loss": 1.406, "step": 592 }, { "epoch": 0.16358620689655173, "grad_norm": 4.8835768699646, "learning_rate": 6.538037486218303e-06, "loss": 1.2809, "step": 593 }, { "epoch": 0.16386206896551725, "grad_norm": 4.372856616973877, "learning_rate": 6.549062844542448e-06, "loss": 1.2809, "step": 594 }, { "epoch": 0.16413793103448276, "grad_norm": 4.929779529571533, "learning_rate": 6.560088202866594e-06, "loss": 1.1567, "step": 595 }, { "epoch": 0.16441379310344828, "grad_norm": 4.653621673583984, "learning_rate": 6.571113561190739e-06, "loss": 1.3435, "step": 596 }, { "epoch": 0.1646896551724138, "grad_norm": 4.216700553894043, "learning_rate": 6.5821389195148845e-06, "loss": 1.1458, "step": 597 }, { "epoch": 0.1649655172413793, "grad_norm": 5.331818103790283, "learning_rate": 6.593164277839031e-06, "loss": 1.4012, "step": 598 }, { "epoch": 0.16524137931034483, "grad_norm": 5.060673713684082, "learning_rate": 6.604189636163176e-06, "loss": 1.4125, "step": 599 }, { "epoch": 0.16551724137931034, "grad_norm": 4.566354274749756, "learning_rate": 6.6152149944873215e-06, "loss": 1.1475, "step": 600 }, { "epoch": 0.16579310344827586, "grad_norm": 4.598692893981934, "learning_rate": 6.626240352811467e-06, "loss": 1.297, "step": 601 }, { "epoch": 0.16606896551724137, "grad_norm": 5.012694835662842, "learning_rate": 6.637265711135612e-06, "loss": 1.4706, "step": 602 }, { "epoch": 0.1663448275862069, "grad_norm": 4.8546223640441895, "learning_rate": 6.648291069459758e-06, "loss": 1.2883, "step": 603 }, { "epoch": 0.1666206896551724, "grad_norm": 4.481150150299072, "learning_rate": 6.659316427783904e-06, "loss": 1.2402, "step": 604 }, { "epoch": 0.16689655172413792, "grad_norm": 4.488452911376953, "learning_rate": 6.6703417861080484e-06, "loss": 1.2613, "step": 605 }, { "epoch": 0.16717241379310344, "grad_norm": 4.544008255004883, "learning_rate": 6.681367144432195e-06, "loss": 1.2001, "step": 606 }, { "epoch": 0.16744827586206898, "grad_norm": 4.430274963378906, "learning_rate": 6.69239250275634e-06, "loss": 1.2062, "step": 607 }, { "epoch": 0.1677241379310345, "grad_norm": 4.4892096519470215, "learning_rate": 6.7034178610804854e-06, "loss": 1.2859, "step": 608 }, { "epoch": 0.168, "grad_norm": 4.3937602043151855, "learning_rate": 6.714443219404632e-06, "loss": 1.1078, "step": 609 }, { "epoch": 0.16827586206896553, "grad_norm": 5.13479471206665, "learning_rate": 6.725468577728776e-06, "loss": 1.406, "step": 610 }, { "epoch": 0.16855172413793104, "grad_norm": 4.835383415222168, "learning_rate": 6.7364939360529224e-06, "loss": 1.3492, "step": 611 }, { "epoch": 0.16882758620689656, "grad_norm": 5.226990699768066, "learning_rate": 6.747519294377067e-06, "loss": 1.4523, "step": 612 }, { "epoch": 0.16910344827586207, "grad_norm": 4.52901554107666, "learning_rate": 6.758544652701213e-06, "loss": 1.304, "step": 613 }, { "epoch": 0.1693793103448276, "grad_norm": 4.747707366943359, "learning_rate": 6.7695700110253594e-06, "loss": 1.2819, "step": 614 }, { "epoch": 0.1696551724137931, "grad_norm": 4.421627044677734, "learning_rate": 6.780595369349504e-06, "loss": 1.1244, "step": 615 }, { "epoch": 0.16993103448275862, "grad_norm": 4.759936332702637, "learning_rate": 6.79162072767365e-06, "loss": 1.4388, "step": 616 }, { "epoch": 0.17020689655172414, "grad_norm": 5.179024696350098, "learning_rate": 6.802646085997795e-06, "loss": 1.134, "step": 617 }, { "epoch": 0.17048275862068965, "grad_norm": 5.0831098556518555, "learning_rate": 6.813671444321941e-06, "loss": 1.3737, "step": 618 }, { "epoch": 0.17075862068965517, "grad_norm": 4.905729293823242, "learning_rate": 6.824696802646087e-06, "loss": 1.3623, "step": 619 }, { "epoch": 0.17103448275862068, "grad_norm": 4.560866832733154, "learning_rate": 6.835722160970232e-06, "loss": 1.3893, "step": 620 }, { "epoch": 0.1713103448275862, "grad_norm": 4.178302764892578, "learning_rate": 6.846747519294378e-06, "loss": 1.2161, "step": 621 }, { "epoch": 0.1715862068965517, "grad_norm": 4.909627437591553, "learning_rate": 6.8577728776185225e-06, "loss": 1.3868, "step": 622 }, { "epoch": 0.17186206896551723, "grad_norm": 4.9289679527282715, "learning_rate": 6.868798235942669e-06, "loss": 1.1759, "step": 623 }, { "epoch": 0.17213793103448277, "grad_norm": 4.917293548583984, "learning_rate": 6.879823594266815e-06, "loss": 1.2716, "step": 624 }, { "epoch": 0.1724137931034483, "grad_norm": 4.39429235458374, "learning_rate": 6.8908489525909595e-06, "loss": 1.3736, "step": 625 }, { "epoch": 0.1726896551724138, "grad_norm": 4.697971343994141, "learning_rate": 6.901874310915106e-06, "loss": 1.2352, "step": 626 }, { "epoch": 0.17296551724137932, "grad_norm": 4.641385078430176, "learning_rate": 6.912899669239251e-06, "loss": 1.1785, "step": 627 }, { "epoch": 0.17324137931034483, "grad_norm": 4.608788967132568, "learning_rate": 6.9239250275633965e-06, "loss": 1.3651, "step": 628 }, { "epoch": 0.17351724137931035, "grad_norm": 4.632394790649414, "learning_rate": 6.934950385887542e-06, "loss": 1.1936, "step": 629 }, { "epoch": 0.17379310344827587, "grad_norm": 5.182921409606934, "learning_rate": 6.945975744211687e-06, "loss": 1.2299, "step": 630 }, { "epoch": 0.17406896551724138, "grad_norm": 4.783362865447998, "learning_rate": 6.957001102535833e-06, "loss": 1.3057, "step": 631 }, { "epoch": 0.1743448275862069, "grad_norm": 4.850286483764648, "learning_rate": 6.968026460859979e-06, "loss": 1.2302, "step": 632 }, { "epoch": 0.1746206896551724, "grad_norm": 4.739316940307617, "learning_rate": 6.979051819184124e-06, "loss": 1.3577, "step": 633 }, { "epoch": 0.17489655172413793, "grad_norm": 4.542716026306152, "learning_rate": 6.99007717750827e-06, "loss": 1.2919, "step": 634 }, { "epoch": 0.17517241379310344, "grad_norm": 4.532547950744629, "learning_rate": 7.001102535832415e-06, "loss": 1.3926, "step": 635 }, { "epoch": 0.17544827586206896, "grad_norm": 4.667785167694092, "learning_rate": 7.01212789415656e-06, "loss": 1.363, "step": 636 }, { "epoch": 0.17572413793103447, "grad_norm": 5.059520244598389, "learning_rate": 7.023153252480707e-06, "loss": 1.3421, "step": 637 }, { "epoch": 0.176, "grad_norm": 4.708293914794922, "learning_rate": 7.034178610804851e-06, "loss": 1.2432, "step": 638 }, { "epoch": 0.1762758620689655, "grad_norm": 5.080163478851318, "learning_rate": 7.045203969128997e-06, "loss": 1.305, "step": 639 }, { "epoch": 0.17655172413793102, "grad_norm": 4.822748184204102, "learning_rate": 7.056229327453143e-06, "loss": 1.203, "step": 640 }, { "epoch": 0.17682758620689656, "grad_norm": 4.493291854858398, "learning_rate": 7.067254685777288e-06, "loss": 1.2707, "step": 641 }, { "epoch": 0.17710344827586208, "grad_norm": 5.012834072113037, "learning_rate": 7.078280044101434e-06, "loss": 1.3125, "step": 642 }, { "epoch": 0.1773793103448276, "grad_norm": 4.61276388168335, "learning_rate": 7.089305402425579e-06, "loss": 1.3707, "step": 643 }, { "epoch": 0.1776551724137931, "grad_norm": 4.6235246658325195, "learning_rate": 7.100330760749725e-06, "loss": 1.3504, "step": 644 }, { "epoch": 0.17793103448275863, "grad_norm": 4.656466007232666, "learning_rate": 7.11135611907387e-06, "loss": 1.3201, "step": 645 }, { "epoch": 0.17820689655172414, "grad_norm": 4.733910083770752, "learning_rate": 7.122381477398016e-06, "loss": 1.5171, "step": 646 }, { "epoch": 0.17848275862068966, "grad_norm": 4.854198932647705, "learning_rate": 7.133406835722162e-06, "loss": 1.2858, "step": 647 }, { "epoch": 0.17875862068965517, "grad_norm": 5.997928142547607, "learning_rate": 7.144432194046307e-06, "loss": 1.3305, "step": 648 }, { "epoch": 0.1790344827586207, "grad_norm": 4.94354772567749, "learning_rate": 7.155457552370453e-06, "loss": 1.2657, "step": 649 }, { "epoch": 0.1793103448275862, "grad_norm": 4.947078227996826, "learning_rate": 7.1664829106945975e-06, "loss": 1.252, "step": 650 }, { "epoch": 0.17958620689655172, "grad_norm": 4.412075042724609, "learning_rate": 7.177508269018744e-06, "loss": 1.3211, "step": 651 }, { "epoch": 0.17986206896551724, "grad_norm": 4.447366237640381, "learning_rate": 7.18853362734289e-06, "loss": 1.3048, "step": 652 }, { "epoch": 0.18013793103448275, "grad_norm": 4.652190685272217, "learning_rate": 7.1995589856670345e-06, "loss": 1.3022, "step": 653 }, { "epoch": 0.18041379310344827, "grad_norm": 4.4208245277404785, "learning_rate": 7.210584343991181e-06, "loss": 1.3557, "step": 654 }, { "epoch": 0.18068965517241378, "grad_norm": 5.119078159332275, "learning_rate": 7.221609702315325e-06, "loss": 1.3689, "step": 655 }, { "epoch": 0.1809655172413793, "grad_norm": 5.311029434204102, "learning_rate": 7.2326350606394715e-06, "loss": 1.356, "step": 656 }, { "epoch": 0.18124137931034484, "grad_norm": 5.066951274871826, "learning_rate": 7.243660418963617e-06, "loss": 1.3277, "step": 657 }, { "epoch": 0.18151724137931036, "grad_norm": 4.834450721740723, "learning_rate": 7.254685777287762e-06, "loss": 1.2816, "step": 658 }, { "epoch": 0.18179310344827587, "grad_norm": 5.273137092590332, "learning_rate": 7.2657111356119085e-06, "loss": 1.3077, "step": 659 }, { "epoch": 0.1820689655172414, "grad_norm": 4.49812126159668, "learning_rate": 7.276736493936053e-06, "loss": 1.3019, "step": 660 }, { "epoch": 0.1823448275862069, "grad_norm": 4.635072708129883, "learning_rate": 7.287761852260199e-06, "loss": 1.2351, "step": 661 }, { "epoch": 0.18262068965517242, "grad_norm": 4.614199638366699, "learning_rate": 7.298787210584345e-06, "loss": 1.1775, "step": 662 }, { "epoch": 0.18289655172413793, "grad_norm": 4.733823299407959, "learning_rate": 7.30981256890849e-06, "loss": 1.1458, "step": 663 }, { "epoch": 0.18317241379310345, "grad_norm": 4.579779148101807, "learning_rate": 7.320837927232635e-06, "loss": 1.4065, "step": 664 }, { "epoch": 0.18344827586206897, "grad_norm": 4.667539596557617, "learning_rate": 7.331863285556782e-06, "loss": 1.3204, "step": 665 }, { "epoch": 0.18372413793103448, "grad_norm": 4.430316925048828, "learning_rate": 7.342888643880927e-06, "loss": 1.2374, "step": 666 }, { "epoch": 0.184, "grad_norm": 4.785229206085205, "learning_rate": 7.353914002205072e-06, "loss": 1.2397, "step": 667 }, { "epoch": 0.1842758620689655, "grad_norm": 4.568402290344238, "learning_rate": 7.364939360529218e-06, "loss": 1.2613, "step": 668 }, { "epoch": 0.18455172413793103, "grad_norm": 4.627558708190918, "learning_rate": 7.375964718853363e-06, "loss": 1.241, "step": 669 }, { "epoch": 0.18482758620689654, "grad_norm": 4.871840953826904, "learning_rate": 7.386990077177509e-06, "loss": 1.4475, "step": 670 }, { "epoch": 0.18510344827586206, "grad_norm": 4.598531246185303, "learning_rate": 7.398015435501654e-06, "loss": 1.332, "step": 671 }, { "epoch": 0.18537931034482757, "grad_norm": 4.855278968811035, "learning_rate": 7.4090407938258e-06, "loss": 1.2676, "step": 672 }, { "epoch": 0.1856551724137931, "grad_norm": 4.579756259918213, "learning_rate": 7.420066152149945e-06, "loss": 1.2424, "step": 673 }, { "epoch": 0.18593103448275863, "grad_norm": 4.7249016761779785, "learning_rate": 7.431091510474091e-06, "loss": 1.1867, "step": 674 }, { "epoch": 0.18620689655172415, "grad_norm": 5.0510478019714355, "learning_rate": 7.442116868798237e-06, "loss": 1.4649, "step": 675 }, { "epoch": 0.18648275862068966, "grad_norm": 4.844771862030029, "learning_rate": 7.453142227122382e-06, "loss": 1.1417, "step": 676 }, { "epoch": 0.18675862068965518, "grad_norm": 4.553152561187744, "learning_rate": 7.464167585446528e-06, "loss": 1.26, "step": 677 }, { "epoch": 0.1870344827586207, "grad_norm": 4.725376129150391, "learning_rate": 7.4751929437706724e-06, "loss": 1.3957, "step": 678 }, { "epoch": 0.1873103448275862, "grad_norm": 4.57722806930542, "learning_rate": 7.486218302094819e-06, "loss": 1.2566, "step": 679 }, { "epoch": 0.18758620689655173, "grad_norm": 4.751240253448486, "learning_rate": 7.497243660418965e-06, "loss": 1.2591, "step": 680 }, { "epoch": 0.18786206896551724, "grad_norm": 4.809601783752441, "learning_rate": 7.5082690187431094e-06, "loss": 1.3333, "step": 681 }, { "epoch": 0.18813793103448276, "grad_norm": 5.020957946777344, "learning_rate": 7.519294377067256e-06, "loss": 1.3377, "step": 682 }, { "epoch": 0.18841379310344827, "grad_norm": 4.730230808258057, "learning_rate": 7.5303197353914e-06, "loss": 1.3415, "step": 683 }, { "epoch": 0.1886896551724138, "grad_norm": 5.604578495025635, "learning_rate": 7.5413450937155464e-06, "loss": 1.3754, "step": 684 }, { "epoch": 0.1889655172413793, "grad_norm": 4.842534065246582, "learning_rate": 7.552370452039693e-06, "loss": 1.3128, "step": 685 }, { "epoch": 0.18924137931034482, "grad_norm": 4.6406755447387695, "learning_rate": 7.563395810363837e-06, "loss": 1.38, "step": 686 }, { "epoch": 0.18951724137931034, "grad_norm": 4.471737384796143, "learning_rate": 7.5744211686879834e-06, "loss": 1.3474, "step": 687 }, { "epoch": 0.18979310344827585, "grad_norm": 4.182044982910156, "learning_rate": 7.585446527012128e-06, "loss": 1.1249, "step": 688 }, { "epoch": 0.19006896551724137, "grad_norm": 4.321019649505615, "learning_rate": 7.596471885336274e-06, "loss": 1.1742, "step": 689 }, { "epoch": 0.19034482758620688, "grad_norm": 5.080165386199951, "learning_rate": 7.60749724366042e-06, "loss": 1.3922, "step": 690 }, { "epoch": 0.19062068965517243, "grad_norm": 4.375417232513428, "learning_rate": 7.618522601984565e-06, "loss": 1.2013, "step": 691 }, { "epoch": 0.19089655172413794, "grad_norm": 4.34298038482666, "learning_rate": 7.62954796030871e-06, "loss": 1.233, "step": 692 }, { "epoch": 0.19117241379310346, "grad_norm": 4.359104633331299, "learning_rate": 7.640573318632855e-06, "loss": 1.2185, "step": 693 }, { "epoch": 0.19144827586206897, "grad_norm": 4.940853118896484, "learning_rate": 7.651598676957001e-06, "loss": 1.2972, "step": 694 }, { "epoch": 0.1917241379310345, "grad_norm": 4.377235412597656, "learning_rate": 7.662624035281147e-06, "loss": 1.2622, "step": 695 }, { "epoch": 0.192, "grad_norm": 4.501707553863525, "learning_rate": 7.673649393605292e-06, "loss": 1.2994, "step": 696 }, { "epoch": 0.19227586206896552, "grad_norm": 4.276073932647705, "learning_rate": 7.684674751929438e-06, "loss": 1.2539, "step": 697 }, { "epoch": 0.19255172413793104, "grad_norm": 4.8355937004089355, "learning_rate": 7.695700110253583e-06, "loss": 1.2798, "step": 698 }, { "epoch": 0.19282758620689655, "grad_norm": 4.546530246734619, "learning_rate": 7.706725468577729e-06, "loss": 1.1996, "step": 699 }, { "epoch": 0.19310344827586207, "grad_norm": 4.043571472167969, "learning_rate": 7.717750826901875e-06, "loss": 1.1958, "step": 700 }, { "epoch": 0.19337931034482758, "grad_norm": 4.8824005126953125, "learning_rate": 7.72877618522602e-06, "loss": 1.184, "step": 701 }, { "epoch": 0.1936551724137931, "grad_norm": 5.072420120239258, "learning_rate": 7.739801543550166e-06, "loss": 1.2884, "step": 702 }, { "epoch": 0.1939310344827586, "grad_norm": 4.666810512542725, "learning_rate": 7.750826901874312e-06, "loss": 1.2294, "step": 703 }, { "epoch": 0.19420689655172413, "grad_norm": 4.559747695922852, "learning_rate": 7.761852260198457e-06, "loss": 1.2998, "step": 704 }, { "epoch": 0.19448275862068964, "grad_norm": 4.889866352081299, "learning_rate": 7.772877618522603e-06, "loss": 1.2662, "step": 705 }, { "epoch": 0.19475862068965516, "grad_norm": 4.760055065155029, "learning_rate": 7.783902976846747e-06, "loss": 1.4619, "step": 706 }, { "epoch": 0.1950344827586207, "grad_norm": 4.664912700653076, "learning_rate": 7.794928335170894e-06, "loss": 1.3345, "step": 707 }, { "epoch": 0.19531034482758622, "grad_norm": 4.752709865570068, "learning_rate": 7.80595369349504e-06, "loss": 1.3984, "step": 708 }, { "epoch": 0.19558620689655173, "grad_norm": 4.473564147949219, "learning_rate": 7.816979051819184e-06, "loss": 1.3557, "step": 709 }, { "epoch": 0.19586206896551725, "grad_norm": 4.252484321594238, "learning_rate": 7.82800441014333e-06, "loss": 1.3038, "step": 710 }, { "epoch": 0.19613793103448277, "grad_norm": 4.335697650909424, "learning_rate": 7.839029768467475e-06, "loss": 1.2219, "step": 711 }, { "epoch": 0.19641379310344828, "grad_norm": 4.684236526489258, "learning_rate": 7.850055126791621e-06, "loss": 1.2281, "step": 712 }, { "epoch": 0.1966896551724138, "grad_norm": 4.179485321044922, "learning_rate": 7.861080485115768e-06, "loss": 1.1212, "step": 713 }, { "epoch": 0.1969655172413793, "grad_norm": 4.955034255981445, "learning_rate": 7.872105843439912e-06, "loss": 1.32, "step": 714 }, { "epoch": 0.19724137931034483, "grad_norm": 4.16707181930542, "learning_rate": 7.883131201764058e-06, "loss": 1.2025, "step": 715 }, { "epoch": 0.19751724137931034, "grad_norm": 4.472283363342285, "learning_rate": 7.894156560088203e-06, "loss": 1.3702, "step": 716 }, { "epoch": 0.19779310344827586, "grad_norm": 4.274471282958984, "learning_rate": 7.90518191841235e-06, "loss": 1.1499, "step": 717 }, { "epoch": 0.19806896551724137, "grad_norm": 4.269021034240723, "learning_rate": 7.916207276736495e-06, "loss": 1.2678, "step": 718 }, { "epoch": 0.1983448275862069, "grad_norm": 4.6710333824157715, "learning_rate": 7.92723263506064e-06, "loss": 1.4011, "step": 719 }, { "epoch": 0.1986206896551724, "grad_norm": 4.604608058929443, "learning_rate": 7.938257993384786e-06, "loss": 1.2989, "step": 720 }, { "epoch": 0.19889655172413792, "grad_norm": 5.0577778816223145, "learning_rate": 7.94928335170893e-06, "loss": 1.2765, "step": 721 }, { "epoch": 0.19917241379310344, "grad_norm": 4.356962203979492, "learning_rate": 7.960308710033077e-06, "loss": 1.2427, "step": 722 }, { "epoch": 0.19944827586206895, "grad_norm": 4.599251747131348, "learning_rate": 7.971334068357223e-06, "loss": 1.304, "step": 723 }, { "epoch": 0.1997241379310345, "grad_norm": 4.418518543243408, "learning_rate": 7.982359426681368e-06, "loss": 1.2438, "step": 724 }, { "epoch": 0.2, "grad_norm": 4.42393684387207, "learning_rate": 7.993384785005514e-06, "loss": 1.34, "step": 725 }, { "epoch": 0.20027586206896553, "grad_norm": 4.24406099319458, "learning_rate": 8.004410143329658e-06, "loss": 1.3115, "step": 726 }, { "epoch": 0.20055172413793104, "grad_norm": 4.76524543762207, "learning_rate": 8.015435501653805e-06, "loss": 1.3736, "step": 727 }, { "epoch": 0.20082758620689656, "grad_norm": 4.251204013824463, "learning_rate": 8.026460859977951e-06, "loss": 1.2526, "step": 728 }, { "epoch": 0.20110344827586207, "grad_norm": 4.301199913024902, "learning_rate": 8.037486218302095e-06, "loss": 1.3111, "step": 729 }, { "epoch": 0.2013793103448276, "grad_norm": 4.491638660430908, "learning_rate": 8.048511576626242e-06, "loss": 1.4013, "step": 730 }, { "epoch": 0.2016551724137931, "grad_norm": 5.058411598205566, "learning_rate": 8.059536934950386e-06, "loss": 1.2265, "step": 731 }, { "epoch": 0.20193103448275862, "grad_norm": 5.143481731414795, "learning_rate": 8.070562293274532e-06, "loss": 1.3129, "step": 732 }, { "epoch": 0.20220689655172414, "grad_norm": 4.550878047943115, "learning_rate": 8.081587651598679e-06, "loss": 1.3641, "step": 733 }, { "epoch": 0.20248275862068965, "grad_norm": 5.18971586227417, "learning_rate": 8.092613009922823e-06, "loss": 1.3279, "step": 734 }, { "epoch": 0.20275862068965517, "grad_norm": 4.400355339050293, "learning_rate": 8.10363836824697e-06, "loss": 1.1714, "step": 735 }, { "epoch": 0.20303448275862068, "grad_norm": 4.8910651206970215, "learning_rate": 8.114663726571114e-06, "loss": 1.4128, "step": 736 }, { "epoch": 0.2033103448275862, "grad_norm": 3.9554426670074463, "learning_rate": 8.12568908489526e-06, "loss": 1.218, "step": 737 }, { "epoch": 0.2035862068965517, "grad_norm": 4.2595438957214355, "learning_rate": 8.136714443219406e-06, "loss": 1.1599, "step": 738 }, { "epoch": 0.20386206896551723, "grad_norm": 4.581275939941406, "learning_rate": 8.147739801543551e-06, "loss": 1.2715, "step": 739 }, { "epoch": 0.20413793103448277, "grad_norm": 4.762921333312988, "learning_rate": 8.158765159867697e-06, "loss": 1.2937, "step": 740 }, { "epoch": 0.2044137931034483, "grad_norm": 5.6876726150512695, "learning_rate": 8.169790518191842e-06, "loss": 1.3446, "step": 741 }, { "epoch": 0.2046896551724138, "grad_norm": 5.168393611907959, "learning_rate": 8.180815876515988e-06, "loss": 1.2948, "step": 742 }, { "epoch": 0.20496551724137932, "grad_norm": 4.356135845184326, "learning_rate": 8.191841234840133e-06, "loss": 1.2555, "step": 743 }, { "epoch": 0.20524137931034483, "grad_norm": 4.260981559753418, "learning_rate": 8.202866593164279e-06, "loss": 1.2919, "step": 744 }, { "epoch": 0.20551724137931035, "grad_norm": 4.687375068664551, "learning_rate": 8.213891951488423e-06, "loss": 1.1646, "step": 745 }, { "epoch": 0.20579310344827587, "grad_norm": 4.687869548797607, "learning_rate": 8.22491730981257e-06, "loss": 1.2873, "step": 746 }, { "epoch": 0.20606896551724138, "grad_norm": 4.970808982849121, "learning_rate": 8.235942668136716e-06, "loss": 1.3815, "step": 747 }, { "epoch": 0.2063448275862069, "grad_norm": 4.58160924911499, "learning_rate": 8.24696802646086e-06, "loss": 1.3539, "step": 748 }, { "epoch": 0.2066206896551724, "grad_norm": 4.469265460968018, "learning_rate": 8.257993384785007e-06, "loss": 1.3971, "step": 749 }, { "epoch": 0.20689655172413793, "grad_norm": 4.732211589813232, "learning_rate": 8.269018743109151e-06, "loss": 1.2777, "step": 750 }, { "epoch": 0.20717241379310344, "grad_norm": 4.34451961517334, "learning_rate": 8.280044101433297e-06, "loss": 1.2597, "step": 751 }, { "epoch": 0.20744827586206896, "grad_norm": 4.6255974769592285, "learning_rate": 8.291069459757442e-06, "loss": 1.3331, "step": 752 }, { "epoch": 0.20772413793103447, "grad_norm": 4.270122528076172, "learning_rate": 8.302094818081588e-06, "loss": 1.2645, "step": 753 }, { "epoch": 0.208, "grad_norm": 4.410320281982422, "learning_rate": 8.313120176405733e-06, "loss": 1.2786, "step": 754 }, { "epoch": 0.2082758620689655, "grad_norm": 5.12298059463501, "learning_rate": 8.324145534729879e-06, "loss": 1.3522, "step": 755 }, { "epoch": 0.20855172413793102, "grad_norm": 4.339027404785156, "learning_rate": 8.335170893054025e-06, "loss": 1.434, "step": 756 }, { "epoch": 0.20882758620689656, "grad_norm": 6.769815444946289, "learning_rate": 8.34619625137817e-06, "loss": 1.4499, "step": 757 }, { "epoch": 0.20910344827586208, "grad_norm": 3.925297260284424, "learning_rate": 8.357221609702316e-06, "loss": 1.0721, "step": 758 }, { "epoch": 0.2093793103448276, "grad_norm": 4.3660101890563965, "learning_rate": 8.36824696802646e-06, "loss": 1.2351, "step": 759 }, { "epoch": 0.2096551724137931, "grad_norm": 4.173503875732422, "learning_rate": 8.379272326350607e-06, "loss": 1.2172, "step": 760 }, { "epoch": 0.20993103448275863, "grad_norm": 4.757811069488525, "learning_rate": 8.390297684674753e-06, "loss": 1.3409, "step": 761 }, { "epoch": 0.21020689655172414, "grad_norm": 4.3970537185668945, "learning_rate": 8.401323042998897e-06, "loss": 1.3036, "step": 762 }, { "epoch": 0.21048275862068966, "grad_norm": 4.237130641937256, "learning_rate": 8.412348401323044e-06, "loss": 1.1994, "step": 763 }, { "epoch": 0.21075862068965517, "grad_norm": 4.63571310043335, "learning_rate": 8.423373759647188e-06, "loss": 1.3928, "step": 764 }, { "epoch": 0.2110344827586207, "grad_norm": 5.089085102081299, "learning_rate": 8.434399117971334e-06, "loss": 1.4577, "step": 765 }, { "epoch": 0.2113103448275862, "grad_norm": 4.334248065948486, "learning_rate": 8.44542447629548e-06, "loss": 1.2537, "step": 766 }, { "epoch": 0.21158620689655172, "grad_norm": 4.7747087478637695, "learning_rate": 8.456449834619625e-06, "loss": 1.3864, "step": 767 }, { "epoch": 0.21186206896551724, "grad_norm": 5.117302894592285, "learning_rate": 8.467475192943771e-06, "loss": 1.3858, "step": 768 }, { "epoch": 0.21213793103448275, "grad_norm": 4.831068515777588, "learning_rate": 8.478500551267916e-06, "loss": 1.3242, "step": 769 }, { "epoch": 0.21241379310344827, "grad_norm": 4.235820293426514, "learning_rate": 8.489525909592062e-06, "loss": 1.2475, "step": 770 }, { "epoch": 0.21268965517241378, "grad_norm": 4.543394565582275, "learning_rate": 8.500551267916208e-06, "loss": 1.3828, "step": 771 }, { "epoch": 0.2129655172413793, "grad_norm": 4.849647521972656, "learning_rate": 8.511576626240353e-06, "loss": 1.2963, "step": 772 }, { "epoch": 0.21324137931034481, "grad_norm": 4.604733943939209, "learning_rate": 8.522601984564499e-06, "loss": 1.1977, "step": 773 }, { "epoch": 0.21351724137931036, "grad_norm": 4.5547099113464355, "learning_rate": 8.533627342888644e-06, "loss": 1.3011, "step": 774 }, { "epoch": 0.21379310344827587, "grad_norm": 4.524393081665039, "learning_rate": 8.54465270121279e-06, "loss": 1.4393, "step": 775 }, { "epoch": 0.2140689655172414, "grad_norm": 4.496883392333984, "learning_rate": 8.555678059536936e-06, "loss": 1.1582, "step": 776 }, { "epoch": 0.2143448275862069, "grad_norm": 4.277092933654785, "learning_rate": 8.56670341786108e-06, "loss": 1.3574, "step": 777 }, { "epoch": 0.21462068965517242, "grad_norm": 4.967523574829102, "learning_rate": 8.577728776185227e-06, "loss": 1.4189, "step": 778 }, { "epoch": 0.21489655172413794, "grad_norm": 4.53342866897583, "learning_rate": 8.588754134509373e-06, "loss": 1.3288, "step": 779 }, { "epoch": 0.21517241379310345, "grad_norm": 4.239219665527344, "learning_rate": 8.599779492833518e-06, "loss": 1.2846, "step": 780 }, { "epoch": 0.21544827586206897, "grad_norm": 4.149988174438477, "learning_rate": 8.610804851157664e-06, "loss": 1.2177, "step": 781 }, { "epoch": 0.21572413793103448, "grad_norm": 5.007024765014648, "learning_rate": 8.621830209481808e-06, "loss": 1.2599, "step": 782 }, { "epoch": 0.216, "grad_norm": 4.833620071411133, "learning_rate": 8.632855567805955e-06, "loss": 1.3758, "step": 783 }, { "epoch": 0.2162758620689655, "grad_norm": 4.365668296813965, "learning_rate": 8.643880926130101e-06, "loss": 1.2118, "step": 784 }, { "epoch": 0.21655172413793103, "grad_norm": 4.704110622406006, "learning_rate": 8.654906284454245e-06, "loss": 1.4405, "step": 785 }, { "epoch": 0.21682758620689654, "grad_norm": 4.215883731842041, "learning_rate": 8.665931642778392e-06, "loss": 1.1919, "step": 786 }, { "epoch": 0.21710344827586206, "grad_norm": 4.60099983215332, "learning_rate": 8.676957001102536e-06, "loss": 1.161, "step": 787 }, { "epoch": 0.21737931034482758, "grad_norm": 4.741946220397949, "learning_rate": 8.687982359426682e-06, "loss": 1.2082, "step": 788 }, { "epoch": 0.2176551724137931, "grad_norm": 4.4741692543029785, "learning_rate": 8.699007717750829e-06, "loss": 1.3407, "step": 789 }, { "epoch": 0.21793103448275863, "grad_norm": 4.580493450164795, "learning_rate": 8.710033076074973e-06, "loss": 1.3422, "step": 790 }, { "epoch": 0.21820689655172415, "grad_norm": 4.4053568840026855, "learning_rate": 8.72105843439912e-06, "loss": 1.3015, "step": 791 }, { "epoch": 0.21848275862068967, "grad_norm": 4.861225605010986, "learning_rate": 8.732083792723264e-06, "loss": 1.2213, "step": 792 }, { "epoch": 0.21875862068965518, "grad_norm": 4.685634613037109, "learning_rate": 8.74310915104741e-06, "loss": 1.3085, "step": 793 }, { "epoch": 0.2190344827586207, "grad_norm": 5.322746753692627, "learning_rate": 8.754134509371556e-06, "loss": 1.2446, "step": 794 }, { "epoch": 0.2193103448275862, "grad_norm": 4.522305011749268, "learning_rate": 8.765159867695701e-06, "loss": 1.2896, "step": 795 }, { "epoch": 0.21958620689655173, "grad_norm": 4.383864879608154, "learning_rate": 8.776185226019847e-06, "loss": 1.3536, "step": 796 }, { "epoch": 0.21986206896551724, "grad_norm": 4.516895294189453, "learning_rate": 8.787210584343992e-06, "loss": 1.2378, "step": 797 }, { "epoch": 0.22013793103448276, "grad_norm": 4.288947105407715, "learning_rate": 8.798235942668138e-06, "loss": 1.1864, "step": 798 }, { "epoch": 0.22041379310344827, "grad_norm": 4.256165981292725, "learning_rate": 8.809261300992284e-06, "loss": 1.1749, "step": 799 }, { "epoch": 0.2206896551724138, "grad_norm": 4.7644219398498535, "learning_rate": 8.820286659316429e-06, "loss": 1.2114, "step": 800 }, { "epoch": 0.2209655172413793, "grad_norm": 4.358612060546875, "learning_rate": 8.831312017640575e-06, "loss": 1.3418, "step": 801 }, { "epoch": 0.22124137931034482, "grad_norm": 4.552274703979492, "learning_rate": 8.84233737596472e-06, "loss": 1.306, "step": 802 }, { "epoch": 0.22151724137931034, "grad_norm": 4.402317523956299, "learning_rate": 8.853362734288866e-06, "loss": 1.252, "step": 803 }, { "epoch": 0.22179310344827585, "grad_norm": 5.114389896392822, "learning_rate": 8.86438809261301e-06, "loss": 1.2622, "step": 804 }, { "epoch": 0.22206896551724137, "grad_norm": 4.10305643081665, "learning_rate": 8.875413450937156e-06, "loss": 1.2065, "step": 805 }, { "epoch": 0.22234482758620688, "grad_norm": 4.624920845031738, "learning_rate": 8.886438809261303e-06, "loss": 1.3583, "step": 806 }, { "epoch": 0.22262068965517243, "grad_norm": 4.914443492889404, "learning_rate": 8.897464167585447e-06, "loss": 1.3709, "step": 807 }, { "epoch": 0.22289655172413794, "grad_norm": 4.426845550537109, "learning_rate": 8.908489525909593e-06, "loss": 1.4061, "step": 808 }, { "epoch": 0.22317241379310346, "grad_norm": 4.24751615524292, "learning_rate": 8.919514884233738e-06, "loss": 1.1703, "step": 809 }, { "epoch": 0.22344827586206897, "grad_norm": 4.89431619644165, "learning_rate": 8.930540242557884e-06, "loss": 1.5375, "step": 810 }, { "epoch": 0.2237241379310345, "grad_norm": 4.498164176940918, "learning_rate": 8.941565600882029e-06, "loss": 1.1906, "step": 811 }, { "epoch": 0.224, "grad_norm": 4.3459062576293945, "learning_rate": 8.952590959206175e-06, "loss": 1.408, "step": 812 }, { "epoch": 0.22427586206896552, "grad_norm": 4.3803486824035645, "learning_rate": 8.96361631753032e-06, "loss": 1.4056, "step": 813 }, { "epoch": 0.22455172413793104, "grad_norm": 4.838947296142578, "learning_rate": 8.974641675854466e-06, "loss": 1.5109, "step": 814 }, { "epoch": 0.22482758620689655, "grad_norm": 4.559952735900879, "learning_rate": 8.985667034178612e-06, "loss": 1.2422, "step": 815 }, { "epoch": 0.22510344827586207, "grad_norm": 4.2517900466918945, "learning_rate": 8.996692392502757e-06, "loss": 1.2239, "step": 816 }, { "epoch": 0.22537931034482758, "grad_norm": 4.933710098266602, "learning_rate": 9.007717750826903e-06, "loss": 1.3277, "step": 817 }, { "epoch": 0.2256551724137931, "grad_norm": 4.069960117340088, "learning_rate": 9.018743109151047e-06, "loss": 1.3653, "step": 818 }, { "epoch": 0.2259310344827586, "grad_norm": 4.455365180969238, "learning_rate": 9.029768467475194e-06, "loss": 1.4954, "step": 819 }, { "epoch": 0.22620689655172413, "grad_norm": 4.3891730308532715, "learning_rate": 9.040793825799338e-06, "loss": 1.2467, "step": 820 }, { "epoch": 0.22648275862068964, "grad_norm": 4.190471649169922, "learning_rate": 9.051819184123484e-06, "loss": 1.1742, "step": 821 }, { "epoch": 0.22675862068965516, "grad_norm": 4.491499423980713, "learning_rate": 9.06284454244763e-06, "loss": 1.1788, "step": 822 }, { "epoch": 0.22703448275862068, "grad_norm": 5.008545875549316, "learning_rate": 9.073869900771775e-06, "loss": 1.3977, "step": 823 }, { "epoch": 0.22731034482758622, "grad_norm": 4.294487476348877, "learning_rate": 9.084895259095921e-06, "loss": 1.2411, "step": 824 }, { "epoch": 0.22758620689655173, "grad_norm": 6.707801342010498, "learning_rate": 9.095920617420066e-06, "loss": 1.2234, "step": 825 }, { "epoch": 0.22786206896551725, "grad_norm": 5.616009712219238, "learning_rate": 9.106945975744212e-06, "loss": 1.2098, "step": 826 }, { "epoch": 0.22813793103448277, "grad_norm": 4.5771050453186035, "learning_rate": 9.117971334068358e-06, "loss": 1.2788, "step": 827 }, { "epoch": 0.22841379310344828, "grad_norm": 4.628068923950195, "learning_rate": 9.128996692392503e-06, "loss": 1.2849, "step": 828 }, { "epoch": 0.2286896551724138, "grad_norm": 4.496346473693848, "learning_rate": 9.140022050716649e-06, "loss": 1.2808, "step": 829 }, { "epoch": 0.2289655172413793, "grad_norm": 4.52477502822876, "learning_rate": 9.151047409040794e-06, "loss": 1.2958, "step": 830 }, { "epoch": 0.22924137931034483, "grad_norm": 4.2242431640625, "learning_rate": 9.16207276736494e-06, "loss": 1.3365, "step": 831 }, { "epoch": 0.22951724137931034, "grad_norm": 4.227226734161377, "learning_rate": 9.173098125689086e-06, "loss": 1.1861, "step": 832 }, { "epoch": 0.22979310344827586, "grad_norm": 4.7039079666137695, "learning_rate": 9.18412348401323e-06, "loss": 1.2269, "step": 833 }, { "epoch": 0.23006896551724138, "grad_norm": 5.1389265060424805, "learning_rate": 9.195148842337377e-06, "loss": 1.459, "step": 834 }, { "epoch": 0.2303448275862069, "grad_norm": 4.630491733551025, "learning_rate": 9.206174200661521e-06, "loss": 1.3411, "step": 835 }, { "epoch": 0.2306206896551724, "grad_norm": 4.3410964012146, "learning_rate": 9.217199558985668e-06, "loss": 1.3113, "step": 836 }, { "epoch": 0.23089655172413792, "grad_norm": 4.252562046051025, "learning_rate": 9.228224917309814e-06, "loss": 1.2648, "step": 837 }, { "epoch": 0.23117241379310344, "grad_norm": 4.17257833480835, "learning_rate": 9.239250275633958e-06, "loss": 1.4046, "step": 838 }, { "epoch": 0.23144827586206895, "grad_norm": 4.963825225830078, "learning_rate": 9.250275633958105e-06, "loss": 1.3363, "step": 839 }, { "epoch": 0.2317241379310345, "grad_norm": 4.2138671875, "learning_rate": 9.261300992282249e-06, "loss": 1.3159, "step": 840 }, { "epoch": 0.232, "grad_norm": 5.016463756561279, "learning_rate": 9.272326350606395e-06, "loss": 1.288, "step": 841 }, { "epoch": 0.23227586206896553, "grad_norm": 4.632814884185791, "learning_rate": 9.283351708930542e-06, "loss": 1.2663, "step": 842 }, { "epoch": 0.23255172413793104, "grad_norm": 4.500759601593018, "learning_rate": 9.294377067254686e-06, "loss": 1.3716, "step": 843 }, { "epoch": 0.23282758620689656, "grad_norm": 4.631801128387451, "learning_rate": 9.305402425578832e-06, "loss": 1.3463, "step": 844 }, { "epoch": 0.23310344827586207, "grad_norm": 4.383293628692627, "learning_rate": 9.316427783902977e-06, "loss": 1.396, "step": 845 }, { "epoch": 0.2333793103448276, "grad_norm": 4.548886775970459, "learning_rate": 9.327453142227123e-06, "loss": 1.2654, "step": 846 }, { "epoch": 0.2336551724137931, "grad_norm": 4.4236650466918945, "learning_rate": 9.33847850055127e-06, "loss": 1.2416, "step": 847 }, { "epoch": 0.23393103448275862, "grad_norm": 4.831140041351318, "learning_rate": 9.349503858875414e-06, "loss": 1.2764, "step": 848 }, { "epoch": 0.23420689655172414, "grad_norm": 4.431097984313965, "learning_rate": 9.36052921719956e-06, "loss": 1.3915, "step": 849 }, { "epoch": 0.23448275862068965, "grad_norm": 4.269824504852295, "learning_rate": 9.371554575523705e-06, "loss": 1.4026, "step": 850 }, { "epoch": 0.23475862068965517, "grad_norm": 4.261257171630859, "learning_rate": 9.382579933847851e-06, "loss": 1.211, "step": 851 }, { "epoch": 0.23503448275862068, "grad_norm": 4.069828033447266, "learning_rate": 9.393605292171997e-06, "loss": 1.3305, "step": 852 }, { "epoch": 0.2353103448275862, "grad_norm": 4.681112766265869, "learning_rate": 9.404630650496142e-06, "loss": 1.2795, "step": 853 }, { "epoch": 0.23558620689655171, "grad_norm": 4.249982833862305, "learning_rate": 9.415656008820288e-06, "loss": 1.2524, "step": 854 }, { "epoch": 0.23586206896551723, "grad_norm": 4.545660495758057, "learning_rate": 9.426681367144434e-06, "loss": 1.4119, "step": 855 }, { "epoch": 0.23613793103448275, "grad_norm": 4.409581661224365, "learning_rate": 9.437706725468579e-06, "loss": 1.3378, "step": 856 }, { "epoch": 0.2364137931034483, "grad_norm": 4.430081367492676, "learning_rate": 9.448732083792725e-06, "loss": 1.3157, "step": 857 }, { "epoch": 0.2366896551724138, "grad_norm": 4.7472453117370605, "learning_rate": 9.45975744211687e-06, "loss": 1.3573, "step": 858 }, { "epoch": 0.23696551724137932, "grad_norm": 4.594561576843262, "learning_rate": 9.470782800441016e-06, "loss": 1.1928, "step": 859 }, { "epoch": 0.23724137931034484, "grad_norm": 4.235453128814697, "learning_rate": 9.481808158765162e-06, "loss": 1.1932, "step": 860 }, { "epoch": 0.23751724137931035, "grad_norm": 4.17944860458374, "learning_rate": 9.492833517089306e-06, "loss": 1.2217, "step": 861 }, { "epoch": 0.23779310344827587, "grad_norm": 4.820381164550781, "learning_rate": 9.503858875413453e-06, "loss": 1.4139, "step": 862 }, { "epoch": 0.23806896551724138, "grad_norm": 4.869057655334473, "learning_rate": 9.514884233737597e-06, "loss": 1.359, "step": 863 }, { "epoch": 0.2383448275862069, "grad_norm": 5.277398109436035, "learning_rate": 9.525909592061743e-06, "loss": 1.4, "step": 864 }, { "epoch": 0.2386206896551724, "grad_norm": 4.754694938659668, "learning_rate": 9.53693495038589e-06, "loss": 1.2908, "step": 865 }, { "epoch": 0.23889655172413793, "grad_norm": 4.350883483886719, "learning_rate": 9.547960308710034e-06, "loss": 1.2923, "step": 866 }, { "epoch": 0.23917241379310344, "grad_norm": 4.550937175750732, "learning_rate": 9.55898566703418e-06, "loss": 1.325, "step": 867 }, { "epoch": 0.23944827586206896, "grad_norm": 4.845820426940918, "learning_rate": 9.570011025358325e-06, "loss": 1.412, "step": 868 }, { "epoch": 0.23972413793103448, "grad_norm": 5.078804016113281, "learning_rate": 9.581036383682471e-06, "loss": 1.4114, "step": 869 }, { "epoch": 0.24, "grad_norm": 4.653894424438477, "learning_rate": 9.592061742006616e-06, "loss": 1.3929, "step": 870 }, { "epoch": 0.2402758620689655, "grad_norm": 4.238353252410889, "learning_rate": 9.603087100330762e-06, "loss": 1.2486, "step": 871 }, { "epoch": 0.24055172413793102, "grad_norm": 4.494268894195557, "learning_rate": 9.614112458654906e-06, "loss": 1.2822, "step": 872 }, { "epoch": 0.24082758620689657, "grad_norm": 4.403338432312012, "learning_rate": 9.625137816979053e-06, "loss": 1.4191, "step": 873 }, { "epoch": 0.24110344827586208, "grad_norm": 4.122784614562988, "learning_rate": 9.636163175303199e-06, "loss": 1.3127, "step": 874 }, { "epoch": 0.2413793103448276, "grad_norm": 4.486606597900391, "learning_rate": 9.647188533627343e-06, "loss": 1.2034, "step": 875 }, { "epoch": 0.2416551724137931, "grad_norm": 4.295325756072998, "learning_rate": 9.65821389195149e-06, "loss": 1.1842, "step": 876 }, { "epoch": 0.24193103448275863, "grad_norm": 4.376078128814697, "learning_rate": 9.669239250275634e-06, "loss": 1.3024, "step": 877 }, { "epoch": 0.24220689655172414, "grad_norm": 4.400404453277588, "learning_rate": 9.68026460859978e-06, "loss": 1.264, "step": 878 }, { "epoch": 0.24248275862068966, "grad_norm": 4.4503703117370605, "learning_rate": 9.691289966923925e-06, "loss": 1.3261, "step": 879 }, { "epoch": 0.24275862068965517, "grad_norm": 4.705063343048096, "learning_rate": 9.702315325248071e-06, "loss": 1.3423, "step": 880 }, { "epoch": 0.2430344827586207, "grad_norm": 4.376526832580566, "learning_rate": 9.713340683572216e-06, "loss": 1.2647, "step": 881 }, { "epoch": 0.2433103448275862, "grad_norm": 4.684849739074707, "learning_rate": 9.724366041896362e-06, "loss": 1.542, "step": 882 }, { "epoch": 0.24358620689655172, "grad_norm": 5.3763885498046875, "learning_rate": 9.735391400220508e-06, "loss": 1.3675, "step": 883 }, { "epoch": 0.24386206896551724, "grad_norm": 4.846948146820068, "learning_rate": 9.746416758544653e-06, "loss": 1.3494, "step": 884 }, { "epoch": 0.24413793103448275, "grad_norm": 4.327201843261719, "learning_rate": 9.757442116868799e-06, "loss": 1.2868, "step": 885 }, { "epoch": 0.24441379310344827, "grad_norm": 4.36176061630249, "learning_rate": 9.768467475192944e-06, "loss": 1.2397, "step": 886 }, { "epoch": 0.24468965517241378, "grad_norm": 4.3260602951049805, "learning_rate": 9.77949283351709e-06, "loss": 1.274, "step": 887 }, { "epoch": 0.2449655172413793, "grad_norm": 4.234586715698242, "learning_rate": 9.790518191841234e-06, "loss": 1.2049, "step": 888 }, { "epoch": 0.24524137931034481, "grad_norm": 4.2607927322387695, "learning_rate": 9.80154355016538e-06, "loss": 1.352, "step": 889 }, { "epoch": 0.24551724137931036, "grad_norm": 4.0316481590271, "learning_rate": 9.812568908489527e-06, "loss": 1.1318, "step": 890 }, { "epoch": 0.24579310344827587, "grad_norm": 5.02911376953125, "learning_rate": 9.823594266813671e-06, "loss": 1.4165, "step": 891 }, { "epoch": 0.2460689655172414, "grad_norm": 4.100216865539551, "learning_rate": 9.834619625137818e-06, "loss": 1.1847, "step": 892 }, { "epoch": 0.2463448275862069, "grad_norm": 4.349404811859131, "learning_rate": 9.845644983461964e-06, "loss": 1.1842, "step": 893 }, { "epoch": 0.24662068965517242, "grad_norm": 4.507713317871094, "learning_rate": 9.856670341786108e-06, "loss": 1.4089, "step": 894 }, { "epoch": 0.24689655172413794, "grad_norm": 4.629605293273926, "learning_rate": 9.867695700110255e-06, "loss": 1.1417, "step": 895 }, { "epoch": 0.24717241379310345, "grad_norm": 4.726772785186768, "learning_rate": 9.878721058434399e-06, "loss": 1.5142, "step": 896 }, { "epoch": 0.24744827586206897, "grad_norm": 4.510002613067627, "learning_rate": 9.889746416758545e-06, "loss": 1.4022, "step": 897 }, { "epoch": 0.24772413793103448, "grad_norm": 4.610316276550293, "learning_rate": 9.900771775082692e-06, "loss": 1.3998, "step": 898 }, { "epoch": 0.248, "grad_norm": 4.159575939178467, "learning_rate": 9.911797133406836e-06, "loss": 1.2744, "step": 899 }, { "epoch": 0.2482758620689655, "grad_norm": 4.661200523376465, "learning_rate": 9.922822491730982e-06, "loss": 1.3294, "step": 900 }, { "epoch": 0.24855172413793103, "grad_norm": 4.492237567901611, "learning_rate": 9.933847850055127e-06, "loss": 1.41, "step": 901 }, { "epoch": 0.24882758620689654, "grad_norm": 4.562465190887451, "learning_rate": 9.944873208379273e-06, "loss": 1.4313, "step": 902 }, { "epoch": 0.24910344827586206, "grad_norm": 4.251619815826416, "learning_rate": 9.95589856670342e-06, "loss": 1.2459, "step": 903 }, { "epoch": 0.24937931034482758, "grad_norm": 4.6402692794799805, "learning_rate": 9.966923925027564e-06, "loss": 1.2503, "step": 904 }, { "epoch": 0.2496551724137931, "grad_norm": 4.796824932098389, "learning_rate": 9.97794928335171e-06, "loss": 1.2046, "step": 905 }, { "epoch": 0.2499310344827586, "grad_norm": 4.636838912963867, "learning_rate": 9.988974641675855e-06, "loss": 1.4087, "step": 906 }, { "epoch": 0.2502068965517241, "grad_norm": 4.566991806030273, "learning_rate": 1e-05, "loss": 1.2032, "step": 907 }, { "epoch": 0.25048275862068964, "grad_norm": 4.621230125427246, "learning_rate": 9.999999916771063e-06, "loss": 1.4097, "step": 908 }, { "epoch": 0.25075862068965515, "grad_norm": 5.113875865936279, "learning_rate": 9.999999667084257e-06, "loss": 1.3906, "step": 909 }, { "epoch": 0.25103448275862067, "grad_norm": 4.788361072540283, "learning_rate": 9.999999250939587e-06, "loss": 1.3156, "step": 910 }, { "epoch": 0.2513103448275862, "grad_norm": 4.366026401519775, "learning_rate": 9.99999866833707e-06, "loss": 1.2499, "step": 911 }, { "epoch": 0.2515862068965517, "grad_norm": 4.639055252075195, "learning_rate": 9.999997919276724e-06, "loss": 1.2263, "step": 912 }, { "epoch": 0.2518620689655172, "grad_norm": 4.921482086181641, "learning_rate": 9.999997003758572e-06, "loss": 1.5084, "step": 913 }, { "epoch": 0.25213793103448273, "grad_norm": 4.63786506652832, "learning_rate": 9.99999592178265e-06, "loss": 1.3765, "step": 914 }, { "epoch": 0.2524137931034483, "grad_norm": 4.475543975830078, "learning_rate": 9.999994673348986e-06, "loss": 1.3521, "step": 915 }, { "epoch": 0.2526896551724138, "grad_norm": 4.344749927520752, "learning_rate": 9.99999325845763e-06, "loss": 1.2684, "step": 916 }, { "epoch": 0.25296551724137933, "grad_norm": 4.353376865386963, "learning_rate": 9.999991677108625e-06, "loss": 1.3317, "step": 917 }, { "epoch": 0.25324137931034485, "grad_norm": 4.130482196807861, "learning_rate": 9.999989929302022e-06, "loss": 1.2064, "step": 918 }, { "epoch": 0.25351724137931037, "grad_norm": 4.171759128570557, "learning_rate": 9.999988015037882e-06, "loss": 1.1587, "step": 919 }, { "epoch": 0.2537931034482759, "grad_norm": 4.191483020782471, "learning_rate": 9.999985934316267e-06, "loss": 1.2824, "step": 920 }, { "epoch": 0.2540689655172414, "grad_norm": 4.1153669357299805, "learning_rate": 9.999983687137248e-06, "loss": 1.2261, "step": 921 }, { "epoch": 0.2543448275862069, "grad_norm": 4.701594829559326, "learning_rate": 9.999981273500898e-06, "loss": 1.4137, "step": 922 }, { "epoch": 0.2546206896551724, "grad_norm": 4.824914932250977, "learning_rate": 9.999978693407299e-06, "loss": 1.4727, "step": 923 }, { "epoch": 0.25489655172413794, "grad_norm": 4.3003668785095215, "learning_rate": 9.999975946856535e-06, "loss": 1.1747, "step": 924 }, { "epoch": 0.25517241379310346, "grad_norm": 4.352760314941406, "learning_rate": 9.9999730338487e-06, "loss": 1.2458, "step": 925 }, { "epoch": 0.255448275862069, "grad_norm": 4.45680570602417, "learning_rate": 9.999969954383888e-06, "loss": 1.2489, "step": 926 }, { "epoch": 0.2557241379310345, "grad_norm": 4.40629243850708, "learning_rate": 9.999966708462204e-06, "loss": 1.2121, "step": 927 }, { "epoch": 0.256, "grad_norm": 4.291517734527588, "learning_rate": 9.999963296083754e-06, "loss": 1.3619, "step": 928 }, { "epoch": 0.2562758620689655, "grad_norm": 4.661025524139404, "learning_rate": 9.999959717248654e-06, "loss": 1.3786, "step": 929 }, { "epoch": 0.25655172413793104, "grad_norm": 4.163248062133789, "learning_rate": 9.99995597195702e-06, "loss": 1.3234, "step": 930 }, { "epoch": 0.25682758620689655, "grad_norm": 4.400679111480713, "learning_rate": 9.99995206020898e-06, "loss": 1.3184, "step": 931 }, { "epoch": 0.25710344827586207, "grad_norm": 4.162512302398682, "learning_rate": 9.999947982004663e-06, "loss": 1.2414, "step": 932 }, { "epoch": 0.2573793103448276, "grad_norm": 4.196034908294678, "learning_rate": 9.999943737344205e-06, "loss": 1.2279, "step": 933 }, { "epoch": 0.2576551724137931, "grad_norm": 4.111690998077393, "learning_rate": 9.999939326227745e-06, "loss": 1.3326, "step": 934 }, { "epoch": 0.2579310344827586, "grad_norm": 4.15796422958374, "learning_rate": 9.999934748655432e-06, "loss": 1.2349, "step": 935 }, { "epoch": 0.25820689655172413, "grad_norm": 4.130764007568359, "learning_rate": 9.999930004627418e-06, "loss": 1.3663, "step": 936 }, { "epoch": 0.25848275862068965, "grad_norm": 4.326107501983643, "learning_rate": 9.999925094143864e-06, "loss": 1.3137, "step": 937 }, { "epoch": 0.25875862068965516, "grad_norm": 6.758279323577881, "learning_rate": 9.999920017204927e-06, "loss": 1.3776, "step": 938 }, { "epoch": 0.2590344827586207, "grad_norm": 4.279474258422852, "learning_rate": 9.99991477381078e-06, "loss": 1.4416, "step": 939 }, { "epoch": 0.2593103448275862, "grad_norm": 4.45224142074585, "learning_rate": 9.999909363961597e-06, "loss": 1.2726, "step": 940 }, { "epoch": 0.2595862068965517, "grad_norm": 4.5109663009643555, "learning_rate": 9.99990378765756e-06, "loss": 1.3206, "step": 941 }, { "epoch": 0.2598620689655172, "grad_norm": 4.4554219245910645, "learning_rate": 9.999898044898852e-06, "loss": 1.3336, "step": 942 }, { "epoch": 0.26013793103448274, "grad_norm": 4.857309341430664, "learning_rate": 9.999892135685664e-06, "loss": 1.3672, "step": 943 }, { "epoch": 0.26041379310344825, "grad_norm": 4.008201599121094, "learning_rate": 9.999886060018194e-06, "loss": 1.2733, "step": 944 }, { "epoch": 0.26068965517241377, "grad_norm": 4.54946231842041, "learning_rate": 9.999879817896645e-06, "loss": 1.2705, "step": 945 }, { "epoch": 0.2609655172413793, "grad_norm": 4.67586612701416, "learning_rate": 9.999873409321223e-06, "loss": 1.3766, "step": 946 }, { "epoch": 0.2612413793103448, "grad_norm": 4.214037895202637, "learning_rate": 9.999866834292143e-06, "loss": 1.2439, "step": 947 }, { "epoch": 0.2615172413793104, "grad_norm": 4.317086696624756, "learning_rate": 9.999860092809621e-06, "loss": 1.3058, "step": 948 }, { "epoch": 0.2617931034482759, "grad_norm": 5.442924976348877, "learning_rate": 9.999853184873886e-06, "loss": 1.3842, "step": 949 }, { "epoch": 0.2620689655172414, "grad_norm": 4.2230753898620605, "learning_rate": 9.999846110485165e-06, "loss": 1.3384, "step": 950 }, { "epoch": 0.2623448275862069, "grad_norm": 4.512312889099121, "learning_rate": 9.999838869643691e-06, "loss": 1.3118, "step": 951 }, { "epoch": 0.26262068965517243, "grad_norm": 4.191985607147217, "learning_rate": 9.999831462349712e-06, "loss": 1.3171, "step": 952 }, { "epoch": 0.26289655172413795, "grad_norm": 4.1891279220581055, "learning_rate": 9.999823888603468e-06, "loss": 1.3326, "step": 953 }, { "epoch": 0.26317241379310347, "grad_norm": 4.7140936851501465, "learning_rate": 9.999816148405215e-06, "loss": 1.3807, "step": 954 }, { "epoch": 0.263448275862069, "grad_norm": 4.243679523468018, "learning_rate": 9.99980824175521e-06, "loss": 1.2669, "step": 955 }, { "epoch": 0.2637241379310345, "grad_norm": 4.219418525695801, "learning_rate": 9.999800168653715e-06, "loss": 1.3349, "step": 956 }, { "epoch": 0.264, "grad_norm": 4.283151626586914, "learning_rate": 9.999791929101e-06, "loss": 1.314, "step": 957 }, { "epoch": 0.26427586206896553, "grad_norm": 4.375310897827148, "learning_rate": 9.999783523097337e-06, "loss": 1.2928, "step": 958 }, { "epoch": 0.26455172413793104, "grad_norm": 4.163501739501953, "learning_rate": 9.99977495064301e-06, "loss": 1.205, "step": 959 }, { "epoch": 0.26482758620689656, "grad_norm": 4.494958400726318, "learning_rate": 9.9997662117383e-06, "loss": 1.4393, "step": 960 }, { "epoch": 0.2651034482758621, "grad_norm": 4.134310722351074, "learning_rate": 9.9997573063835e-06, "loss": 1.2869, "step": 961 }, { "epoch": 0.2653793103448276, "grad_norm": 4.729069709777832, "learning_rate": 9.999748234578908e-06, "loss": 1.4201, "step": 962 }, { "epoch": 0.2656551724137931, "grad_norm": 4.171117305755615, "learning_rate": 9.999738996324822e-06, "loss": 1.3437, "step": 963 }, { "epoch": 0.2659310344827586, "grad_norm": 3.983511447906494, "learning_rate": 9.999729591621554e-06, "loss": 1.1712, "step": 964 }, { "epoch": 0.26620689655172414, "grad_norm": 3.8852880001068115, "learning_rate": 9.999720020469415e-06, "loss": 1.206, "step": 965 }, { "epoch": 0.26648275862068965, "grad_norm": 4.318922996520996, "learning_rate": 9.999710282868724e-06, "loss": 1.4171, "step": 966 }, { "epoch": 0.26675862068965517, "grad_norm": 3.8867180347442627, "learning_rate": 9.999700378819804e-06, "loss": 1.2845, "step": 967 }, { "epoch": 0.2670344827586207, "grad_norm": 4.525215148925781, "learning_rate": 9.999690308322984e-06, "loss": 1.5536, "step": 968 }, { "epoch": 0.2673103448275862, "grad_norm": 4.594872951507568, "learning_rate": 9.999680071378603e-06, "loss": 1.473, "step": 969 }, { "epoch": 0.2675862068965517, "grad_norm": 3.8205535411834717, "learning_rate": 9.999669667987e-06, "loss": 1.2617, "step": 970 }, { "epoch": 0.26786206896551723, "grad_norm": 4.646821022033691, "learning_rate": 9.99965909814852e-06, "loss": 1.3297, "step": 971 }, { "epoch": 0.26813793103448275, "grad_norm": 4.354177951812744, "learning_rate": 9.999648361863517e-06, "loss": 1.3644, "step": 972 }, { "epoch": 0.26841379310344826, "grad_norm": 4.093743324279785, "learning_rate": 9.999637459132345e-06, "loss": 1.3664, "step": 973 }, { "epoch": 0.2686896551724138, "grad_norm": 4.10508394241333, "learning_rate": 9.999626389955373e-06, "loss": 1.1879, "step": 974 }, { "epoch": 0.2689655172413793, "grad_norm": 4.395421028137207, "learning_rate": 9.999615154332962e-06, "loss": 1.3061, "step": 975 }, { "epoch": 0.2692413793103448, "grad_norm": 4.398074626922607, "learning_rate": 9.999603752265491e-06, "loss": 1.1879, "step": 976 }, { "epoch": 0.2695172413793103, "grad_norm": 4.477128028869629, "learning_rate": 9.99959218375334e-06, "loss": 1.3674, "step": 977 }, { "epoch": 0.26979310344827584, "grad_norm": 4.198464393615723, "learning_rate": 9.999580448796892e-06, "loss": 1.2974, "step": 978 }, { "epoch": 0.27006896551724136, "grad_norm": 4.996552467346191, "learning_rate": 9.999568547396539e-06, "loss": 1.363, "step": 979 }, { "epoch": 0.27034482758620687, "grad_norm": 4.21674108505249, "learning_rate": 9.999556479552675e-06, "loss": 1.1607, "step": 980 }, { "epoch": 0.2706206896551724, "grad_norm": 4.126073837280273, "learning_rate": 9.999544245265703e-06, "loss": 1.4271, "step": 981 }, { "epoch": 0.27089655172413796, "grad_norm": 4.109383583068848, "learning_rate": 9.999531844536033e-06, "loss": 1.2024, "step": 982 }, { "epoch": 0.2711724137931035, "grad_norm": 4.021079063415527, "learning_rate": 9.999519277364071e-06, "loss": 1.1088, "step": 983 }, { "epoch": 0.271448275862069, "grad_norm": 3.9247796535491943, "learning_rate": 9.999506543750244e-06, "loss": 1.0881, "step": 984 }, { "epoch": 0.2717241379310345, "grad_norm": 4.376382350921631, "learning_rate": 9.999493643694969e-06, "loss": 1.3, "step": 985 }, { "epoch": 0.272, "grad_norm": 4.39017391204834, "learning_rate": 9.999480577198679e-06, "loss": 1.4327, "step": 986 }, { "epoch": 0.27227586206896554, "grad_norm": 4.193832874298096, "learning_rate": 9.99946734426181e-06, "loss": 1.339, "step": 987 }, { "epoch": 0.27255172413793105, "grad_norm": 4.273287296295166, "learning_rate": 9.999453944884798e-06, "loss": 1.2939, "step": 988 }, { "epoch": 0.27282758620689657, "grad_norm": 3.8906302452087402, "learning_rate": 9.999440379068093e-06, "loss": 1.2627, "step": 989 }, { "epoch": 0.2731034482758621, "grad_norm": 4.159140110015869, "learning_rate": 9.999426646812145e-06, "loss": 1.3407, "step": 990 }, { "epoch": 0.2733793103448276, "grad_norm": 4.231621742248535, "learning_rate": 9.999412748117413e-06, "loss": 1.2528, "step": 991 }, { "epoch": 0.2736551724137931, "grad_norm": 4.424505233764648, "learning_rate": 9.999398682984355e-06, "loss": 1.2528, "step": 992 }, { "epoch": 0.27393103448275863, "grad_norm": 4.7296061515808105, "learning_rate": 9.999384451413447e-06, "loss": 1.2756, "step": 993 }, { "epoch": 0.27420689655172414, "grad_norm": 4.153752326965332, "learning_rate": 9.999370053405155e-06, "loss": 1.2327, "step": 994 }, { "epoch": 0.27448275862068966, "grad_norm": 3.9928126335144043, "learning_rate": 9.999355488959963e-06, "loss": 1.2664, "step": 995 }, { "epoch": 0.2747586206896552, "grad_norm": 4.149047374725342, "learning_rate": 9.999340758078354e-06, "loss": 1.2973, "step": 996 }, { "epoch": 0.2750344827586207, "grad_norm": 4.72184944152832, "learning_rate": 9.99932586076082e-06, "loss": 1.3819, "step": 997 }, { "epoch": 0.2753103448275862, "grad_norm": 4.43402099609375, "learning_rate": 9.999310797007856e-06, "loss": 1.3287, "step": 998 }, { "epoch": 0.2755862068965517, "grad_norm": 4.526209354400635, "learning_rate": 9.999295566819961e-06, "loss": 1.3468, "step": 999 }, { "epoch": 0.27586206896551724, "grad_norm": 4.2259440422058105, "learning_rate": 9.999280170197647e-06, "loss": 1.438, "step": 1000 }, { "epoch": 0.27586206896551724, "eval_loss": 1.351131558418274, "eval_runtime": 11.3635, "eval_samples_per_second": 35.2, "eval_steps_per_second": 4.4, "step": 1000 }, { "epoch": 0.27613793103448275, "grad_norm": 4.030689239501953, "learning_rate": 9.999264607141422e-06, "loss": 1.1585, "step": 1001 }, { "epoch": 0.27641379310344827, "grad_norm": 4.283797264099121, "learning_rate": 9.999248877651808e-06, "loss": 1.2706, "step": 1002 }, { "epoch": 0.2766896551724138, "grad_norm": 4.08389949798584, "learning_rate": 9.999232981729326e-06, "loss": 1.1872, "step": 1003 }, { "epoch": 0.2769655172413793, "grad_norm": 4.321232795715332, "learning_rate": 9.999216919374505e-06, "loss": 1.4819, "step": 1004 }, { "epoch": 0.2772413793103448, "grad_norm": 4.972838401794434, "learning_rate": 9.999200690587882e-06, "loss": 1.5122, "step": 1005 }, { "epoch": 0.27751724137931033, "grad_norm": 4.158543586730957, "learning_rate": 9.999184295369996e-06, "loss": 1.2589, "step": 1006 }, { "epoch": 0.27779310344827585, "grad_norm": 4.392009735107422, "learning_rate": 9.999167733721391e-06, "loss": 1.357, "step": 1007 }, { "epoch": 0.27806896551724136, "grad_norm": 4.499022483825684, "learning_rate": 9.999151005642624e-06, "loss": 1.2511, "step": 1008 }, { "epoch": 0.2783448275862069, "grad_norm": 3.8871471881866455, "learning_rate": 9.999134111134245e-06, "loss": 1.2552, "step": 1009 }, { "epoch": 0.2786206896551724, "grad_norm": 4.606494426727295, "learning_rate": 9.999117050196821e-06, "loss": 1.1963, "step": 1010 }, { "epoch": 0.2788965517241379, "grad_norm": 4.325713634490967, "learning_rate": 9.999099822830918e-06, "loss": 1.2285, "step": 1011 }, { "epoch": 0.2791724137931034, "grad_norm": 3.7106025218963623, "learning_rate": 9.99908242903711e-06, "loss": 1.1925, "step": 1012 }, { "epoch": 0.27944827586206894, "grad_norm": 4.095097064971924, "learning_rate": 9.999064868815975e-06, "loss": 1.1089, "step": 1013 }, { "epoch": 0.27972413793103446, "grad_norm": 4.309759140014648, "learning_rate": 9.999047142168101e-06, "loss": 1.4439, "step": 1014 }, { "epoch": 0.28, "grad_norm": 4.1290998458862305, "learning_rate": 9.999029249094075e-06, "loss": 1.2821, "step": 1015 }, { "epoch": 0.28027586206896554, "grad_norm": 4.614597320556641, "learning_rate": 9.999011189594494e-06, "loss": 1.358, "step": 1016 }, { "epoch": 0.28055172413793106, "grad_norm": 3.896455764770508, "learning_rate": 9.998992963669958e-06, "loss": 1.1279, "step": 1017 }, { "epoch": 0.2808275862068966, "grad_norm": 4.063017845153809, "learning_rate": 9.998974571321074e-06, "loss": 1.2542, "step": 1018 }, { "epoch": 0.2811034482758621, "grad_norm": 4.232154846191406, "learning_rate": 9.998956012548456e-06, "loss": 1.3622, "step": 1019 }, { "epoch": 0.2813793103448276, "grad_norm": 4.502597808837891, "learning_rate": 9.99893728735272e-06, "loss": 1.4391, "step": 1020 }, { "epoch": 0.2816551724137931, "grad_norm": 4.373916149139404, "learning_rate": 9.99891839573449e-06, "loss": 1.2958, "step": 1021 }, { "epoch": 0.28193103448275864, "grad_norm": 4.277380466461182, "learning_rate": 9.998899337694396e-06, "loss": 1.422, "step": 1022 }, { "epoch": 0.28220689655172415, "grad_norm": 3.970576524734497, "learning_rate": 9.99888011323307e-06, "loss": 1.2717, "step": 1023 }, { "epoch": 0.28248275862068967, "grad_norm": 3.5834202766418457, "learning_rate": 9.998860722351155e-06, "loss": 1.2966, "step": 1024 }, { "epoch": 0.2827586206896552, "grad_norm": 4.265092372894287, "learning_rate": 9.998841165049295e-06, "loss": 1.2445, "step": 1025 }, { "epoch": 0.2830344827586207, "grad_norm": 4.052958965301514, "learning_rate": 9.998821441328142e-06, "loss": 1.157, "step": 1026 }, { "epoch": 0.2833103448275862, "grad_norm": 4.806761264801025, "learning_rate": 9.998801551188351e-06, "loss": 1.3786, "step": 1027 }, { "epoch": 0.28358620689655173, "grad_norm": 4.234023571014404, "learning_rate": 9.998781494630585e-06, "loss": 1.2704, "step": 1028 }, { "epoch": 0.28386206896551724, "grad_norm": 4.428048133850098, "learning_rate": 9.998761271655511e-06, "loss": 1.3512, "step": 1029 }, { "epoch": 0.28413793103448276, "grad_norm": 4.213860511779785, "learning_rate": 9.998740882263803e-06, "loss": 1.2917, "step": 1030 }, { "epoch": 0.2844137931034483, "grad_norm": 3.9834654331207275, "learning_rate": 9.99872032645614e-06, "loss": 1.2706, "step": 1031 }, { "epoch": 0.2846896551724138, "grad_norm": 4.126083850860596, "learning_rate": 9.998699604233208e-06, "loss": 1.2059, "step": 1032 }, { "epoch": 0.2849655172413793, "grad_norm": 4.531032085418701, "learning_rate": 9.998678715595693e-06, "loss": 1.2345, "step": 1033 }, { "epoch": 0.2852413793103448, "grad_norm": 4.841335773468018, "learning_rate": 9.998657660544294e-06, "loss": 1.2852, "step": 1034 }, { "epoch": 0.28551724137931034, "grad_norm": 4.191484451293945, "learning_rate": 9.998636439079708e-06, "loss": 1.4062, "step": 1035 }, { "epoch": 0.28579310344827585, "grad_norm": 5.024242401123047, "learning_rate": 9.998615051202644e-06, "loss": 1.5658, "step": 1036 }, { "epoch": 0.28606896551724137, "grad_norm": 4.098049640655518, "learning_rate": 9.998593496913815e-06, "loss": 1.3553, "step": 1037 }, { "epoch": 0.2863448275862069, "grad_norm": 4.333290100097656, "learning_rate": 9.998571776213937e-06, "loss": 1.228, "step": 1038 }, { "epoch": 0.2866206896551724, "grad_norm": 3.9877026081085205, "learning_rate": 9.998549889103735e-06, "loss": 1.2233, "step": 1039 }, { "epoch": 0.2868965517241379, "grad_norm": 4.443782806396484, "learning_rate": 9.998527835583935e-06, "loss": 1.3495, "step": 1040 }, { "epoch": 0.28717241379310343, "grad_norm": 6.652185916900635, "learning_rate": 9.998505615655272e-06, "loss": 1.3287, "step": 1041 }, { "epoch": 0.28744827586206895, "grad_norm": 3.9183249473571777, "learning_rate": 9.998483229318486e-06, "loss": 1.3522, "step": 1042 }, { "epoch": 0.28772413793103446, "grad_norm": 4.264663219451904, "learning_rate": 9.998460676574324e-06, "loss": 1.2412, "step": 1043 }, { "epoch": 0.288, "grad_norm": 4.2035346031188965, "learning_rate": 9.998437957423534e-06, "loss": 1.331, "step": 1044 }, { "epoch": 0.2882758620689655, "grad_norm": 3.9842042922973633, "learning_rate": 9.998415071866873e-06, "loss": 1.2109, "step": 1045 }, { "epoch": 0.288551724137931, "grad_norm": 4.336766719818115, "learning_rate": 9.998392019905105e-06, "loss": 1.3201, "step": 1046 }, { "epoch": 0.2888275862068965, "grad_norm": 4.066944599151611, "learning_rate": 9.998368801538996e-06, "loss": 1.425, "step": 1047 }, { "epoch": 0.2891034482758621, "grad_norm": 4.713097095489502, "learning_rate": 9.998345416769318e-06, "loss": 1.579, "step": 1048 }, { "epoch": 0.2893793103448276, "grad_norm": 4.160200119018555, "learning_rate": 9.998321865596851e-06, "loss": 1.4209, "step": 1049 }, { "epoch": 0.2896551724137931, "grad_norm": 4.817358016967773, "learning_rate": 9.998298148022379e-06, "loss": 1.3791, "step": 1050 }, { "epoch": 0.28993103448275864, "grad_norm": 3.8432116508483887, "learning_rate": 9.99827426404669e-06, "loss": 1.2463, "step": 1051 }, { "epoch": 0.29020689655172416, "grad_norm": 3.850336790084839, "learning_rate": 9.99825021367058e-06, "loss": 1.3399, "step": 1052 }, { "epoch": 0.2904827586206897, "grad_norm": 4.635691165924072, "learning_rate": 9.99822599689485e-06, "loss": 1.4164, "step": 1053 }, { "epoch": 0.2907586206896552, "grad_norm": 4.423676013946533, "learning_rate": 9.998201613720307e-06, "loss": 1.1596, "step": 1054 }, { "epoch": 0.2910344827586207, "grad_norm": 4.377620697021484, "learning_rate": 9.99817706414776e-06, "loss": 1.3331, "step": 1055 }, { "epoch": 0.2913103448275862, "grad_norm": 4.424485683441162, "learning_rate": 9.998152348178032e-06, "loss": 1.4442, "step": 1056 }, { "epoch": 0.29158620689655174, "grad_norm": 4.097755432128906, "learning_rate": 9.99812746581194e-06, "loss": 1.2343, "step": 1057 }, { "epoch": 0.29186206896551725, "grad_norm": 4.623148441314697, "learning_rate": 9.998102417050313e-06, "loss": 1.35, "step": 1058 }, { "epoch": 0.29213793103448277, "grad_norm": 4.247848987579346, "learning_rate": 9.998077201893988e-06, "loss": 1.3691, "step": 1059 }, { "epoch": 0.2924137931034483, "grad_norm": 4.567172527313232, "learning_rate": 9.9980518203438e-06, "loss": 1.3094, "step": 1060 }, { "epoch": 0.2926896551724138, "grad_norm": 4.081173896789551, "learning_rate": 9.998026272400601e-06, "loss": 1.2364, "step": 1061 }, { "epoch": 0.2929655172413793, "grad_norm": 4.69353723526001, "learning_rate": 9.998000558065235e-06, "loss": 1.4484, "step": 1062 }, { "epoch": 0.29324137931034483, "grad_norm": 4.356832027435303, "learning_rate": 9.997974677338558e-06, "loss": 1.3357, "step": 1063 }, { "epoch": 0.29351724137931035, "grad_norm": 4.301070213317871, "learning_rate": 9.997948630221436e-06, "loss": 1.2462, "step": 1064 }, { "epoch": 0.29379310344827586, "grad_norm": 3.921330451965332, "learning_rate": 9.997922416714736e-06, "loss": 1.3075, "step": 1065 }, { "epoch": 0.2940689655172414, "grad_norm": 3.984105110168457, "learning_rate": 9.997896036819327e-06, "loss": 1.153, "step": 1066 }, { "epoch": 0.2943448275862069, "grad_norm": 4.259641170501709, "learning_rate": 9.99786949053609e-06, "loss": 1.1932, "step": 1067 }, { "epoch": 0.2946206896551724, "grad_norm": 3.947016477584839, "learning_rate": 9.997842777865905e-06, "loss": 1.1908, "step": 1068 }, { "epoch": 0.2948965517241379, "grad_norm": 4.579233646392822, "learning_rate": 9.997815898809666e-06, "loss": 1.2052, "step": 1069 }, { "epoch": 0.29517241379310344, "grad_norm": 3.9740469455718994, "learning_rate": 9.997788853368266e-06, "loss": 1.167, "step": 1070 }, { "epoch": 0.29544827586206895, "grad_norm": 4.367825508117676, "learning_rate": 9.997761641542605e-06, "loss": 1.3859, "step": 1071 }, { "epoch": 0.29572413793103447, "grad_norm": 4.281565189361572, "learning_rate": 9.997734263333589e-06, "loss": 1.2908, "step": 1072 }, { "epoch": 0.296, "grad_norm": 3.9761204719543457, "learning_rate": 9.99770671874213e-06, "loss": 1.2038, "step": 1073 }, { "epoch": 0.2962758620689655, "grad_norm": 4.125949859619141, "learning_rate": 9.997679007769145e-06, "loss": 1.3662, "step": 1074 }, { "epoch": 0.296551724137931, "grad_norm": 4.405930995941162, "learning_rate": 9.997651130415557e-06, "loss": 1.3108, "step": 1075 }, { "epoch": 0.29682758620689653, "grad_norm": 4.42073917388916, "learning_rate": 9.997623086682293e-06, "loss": 1.5396, "step": 1076 }, { "epoch": 0.29710344827586205, "grad_norm": 4.298886775970459, "learning_rate": 9.997594876570285e-06, "loss": 1.4421, "step": 1077 }, { "epoch": 0.29737931034482756, "grad_norm": 4.185904502868652, "learning_rate": 9.997566500080476e-06, "loss": 1.2692, "step": 1078 }, { "epoch": 0.2976551724137931, "grad_norm": 4.4684247970581055, "learning_rate": 9.997537957213807e-06, "loss": 1.3139, "step": 1079 }, { "epoch": 0.2979310344827586, "grad_norm": 4.47335958480835, "learning_rate": 9.997509247971231e-06, "loss": 1.3848, "step": 1080 }, { "epoch": 0.29820689655172417, "grad_norm": 4.31109619140625, "learning_rate": 9.997480372353704e-06, "loss": 1.36, "step": 1081 }, { "epoch": 0.2984827586206897, "grad_norm": 4.226624488830566, "learning_rate": 9.997451330362185e-06, "loss": 1.303, "step": 1082 }, { "epoch": 0.2987586206896552, "grad_norm": 4.31461763381958, "learning_rate": 9.997422121997642e-06, "loss": 1.2455, "step": 1083 }, { "epoch": 0.2990344827586207, "grad_norm": 4.714050769805908, "learning_rate": 9.997392747261048e-06, "loss": 1.2287, "step": 1084 }, { "epoch": 0.2993103448275862, "grad_norm": 4.251535415649414, "learning_rate": 9.99736320615338e-06, "loss": 1.2377, "step": 1085 }, { "epoch": 0.29958620689655174, "grad_norm": 4.251693248748779, "learning_rate": 9.997333498675622e-06, "loss": 1.1774, "step": 1086 }, { "epoch": 0.29986206896551726, "grad_norm": 4.25731086730957, "learning_rate": 9.997303624828761e-06, "loss": 1.348, "step": 1087 }, { "epoch": 0.3001379310344828, "grad_norm": 4.208011627197266, "learning_rate": 9.997273584613796e-06, "loss": 1.2688, "step": 1088 }, { "epoch": 0.3004137931034483, "grad_norm": 4.139581203460693, "learning_rate": 9.997243378031722e-06, "loss": 1.3963, "step": 1089 }, { "epoch": 0.3006896551724138, "grad_norm": 4.453812599182129, "learning_rate": 9.997213005083548e-06, "loss": 1.3419, "step": 1090 }, { "epoch": 0.3009655172413793, "grad_norm": 4.19468355178833, "learning_rate": 9.997182465770286e-06, "loss": 1.2597, "step": 1091 }, { "epoch": 0.30124137931034484, "grad_norm": 4.109433174133301, "learning_rate": 9.997151760092948e-06, "loss": 1.3343, "step": 1092 }, { "epoch": 0.30151724137931035, "grad_norm": 4.140635013580322, "learning_rate": 9.99712088805256e-06, "loss": 1.1793, "step": 1093 }, { "epoch": 0.30179310344827587, "grad_norm": 4.384700298309326, "learning_rate": 9.99708984965015e-06, "loss": 1.1634, "step": 1094 }, { "epoch": 0.3020689655172414, "grad_norm": 4.153924942016602, "learning_rate": 9.99705864488675e-06, "loss": 1.32, "step": 1095 }, { "epoch": 0.3023448275862069, "grad_norm": 4.378059387207031, "learning_rate": 9.997027273763398e-06, "loss": 1.3189, "step": 1096 }, { "epoch": 0.3026206896551724, "grad_norm": 4.152798175811768, "learning_rate": 9.99699573628114e-06, "loss": 1.2259, "step": 1097 }, { "epoch": 0.30289655172413793, "grad_norm": 3.996084690093994, "learning_rate": 9.996964032441026e-06, "loss": 1.2651, "step": 1098 }, { "epoch": 0.30317241379310345, "grad_norm": 4.507990837097168, "learning_rate": 9.996932162244111e-06, "loss": 1.2528, "step": 1099 }, { "epoch": 0.30344827586206896, "grad_norm": 4.487477779388428, "learning_rate": 9.996900125691454e-06, "loss": 1.3738, "step": 1100 }, { "epoch": 0.3037241379310345, "grad_norm": 4.536439418792725, "learning_rate": 9.996867922784126e-06, "loss": 1.3559, "step": 1101 }, { "epoch": 0.304, "grad_norm": 3.9295787811279297, "learning_rate": 9.996835553523195e-06, "loss": 1.1988, "step": 1102 }, { "epoch": 0.3042758620689655, "grad_norm": 4.08324670791626, "learning_rate": 9.99680301790974e-06, "loss": 1.3244, "step": 1103 }, { "epoch": 0.304551724137931, "grad_norm": 4.204971790313721, "learning_rate": 9.996770315944846e-06, "loss": 1.4416, "step": 1104 }, { "epoch": 0.30482758620689654, "grad_norm": 4.690728664398193, "learning_rate": 9.996737447629599e-06, "loss": 1.508, "step": 1105 }, { "epoch": 0.30510344827586205, "grad_norm": 4.3082709312438965, "learning_rate": 9.996704412965092e-06, "loss": 1.317, "step": 1106 }, { "epoch": 0.30537931034482757, "grad_norm": 4.619934558868408, "learning_rate": 9.99667121195243e-06, "loss": 1.3235, "step": 1107 }, { "epoch": 0.3056551724137931, "grad_norm": 4.350712776184082, "learning_rate": 9.996637844592714e-06, "loss": 1.3646, "step": 1108 }, { "epoch": 0.3059310344827586, "grad_norm": 4.023681640625, "learning_rate": 9.996604310887058e-06, "loss": 1.2266, "step": 1109 }, { "epoch": 0.3062068965517241, "grad_norm": 4.2075395584106445, "learning_rate": 9.996570610836576e-06, "loss": 1.3521, "step": 1110 }, { "epoch": 0.30648275862068963, "grad_norm": 4.202834606170654, "learning_rate": 9.996536744442389e-06, "loss": 1.3688, "step": 1111 }, { "epoch": 0.30675862068965515, "grad_norm": 3.7197353839874268, "learning_rate": 9.996502711705626e-06, "loss": 1.2443, "step": 1112 }, { "epoch": 0.30703448275862066, "grad_norm": 4.228206157684326, "learning_rate": 9.996468512627422e-06, "loss": 1.3153, "step": 1113 }, { "epoch": 0.3073103448275862, "grad_norm": 4.395427227020264, "learning_rate": 9.996434147208911e-06, "loss": 1.3664, "step": 1114 }, { "epoch": 0.30758620689655175, "grad_norm": 4.598774433135986, "learning_rate": 9.996399615451243e-06, "loss": 1.513, "step": 1115 }, { "epoch": 0.30786206896551727, "grad_norm": 4.2685933113098145, "learning_rate": 9.996364917355562e-06, "loss": 1.3572, "step": 1116 }, { "epoch": 0.3081379310344828, "grad_norm": 4.0451812744140625, "learning_rate": 9.996330052923025e-06, "loss": 1.2241, "step": 1117 }, { "epoch": 0.3084137931034483, "grad_norm": 4.415351867675781, "learning_rate": 9.996295022154793e-06, "loss": 1.4284, "step": 1118 }, { "epoch": 0.3086896551724138, "grad_norm": 4.439688205718994, "learning_rate": 9.996259825052033e-06, "loss": 1.2521, "step": 1119 }, { "epoch": 0.30896551724137933, "grad_norm": 4.448464393615723, "learning_rate": 9.996224461615916e-06, "loss": 1.3757, "step": 1120 }, { "epoch": 0.30924137931034484, "grad_norm": 4.262382984161377, "learning_rate": 9.99618893184762e-06, "loss": 1.3531, "step": 1121 }, { "epoch": 0.30951724137931036, "grad_norm": 4.112327575683594, "learning_rate": 9.996153235748325e-06, "loss": 1.3454, "step": 1122 }, { "epoch": 0.3097931034482759, "grad_norm": 4.374795436859131, "learning_rate": 9.996117373319225e-06, "loss": 1.5201, "step": 1123 }, { "epoch": 0.3100689655172414, "grad_norm": 4.072614669799805, "learning_rate": 9.996081344561507e-06, "loss": 1.1783, "step": 1124 }, { "epoch": 0.3103448275862069, "grad_norm": 4.461469650268555, "learning_rate": 9.996045149476377e-06, "loss": 1.4042, "step": 1125 }, { "epoch": 0.3106206896551724, "grad_norm": 3.966817617416382, "learning_rate": 9.996008788065035e-06, "loss": 1.2505, "step": 1126 }, { "epoch": 0.31089655172413794, "grad_norm": 4.16887903213501, "learning_rate": 9.995972260328694e-06, "loss": 1.4255, "step": 1127 }, { "epoch": 0.31117241379310345, "grad_norm": 3.904297113418579, "learning_rate": 9.995935566268569e-06, "loss": 1.2914, "step": 1128 }, { "epoch": 0.31144827586206897, "grad_norm": 4.533923149108887, "learning_rate": 9.995898705885882e-06, "loss": 1.4114, "step": 1129 }, { "epoch": 0.3117241379310345, "grad_norm": 4.256266117095947, "learning_rate": 9.995861679181861e-06, "loss": 1.2711, "step": 1130 }, { "epoch": 0.312, "grad_norm": 4.238870143890381, "learning_rate": 9.995824486157739e-06, "loss": 1.3017, "step": 1131 }, { "epoch": 0.3122758620689655, "grad_norm": 4.24057674407959, "learning_rate": 9.995787126814754e-06, "loss": 1.3053, "step": 1132 }, { "epoch": 0.31255172413793103, "grad_norm": 4.12558126449585, "learning_rate": 9.995749601154145e-06, "loss": 1.2663, "step": 1133 }, { "epoch": 0.31282758620689655, "grad_norm": 4.179388046264648, "learning_rate": 9.995711909177168e-06, "loss": 1.4416, "step": 1134 }, { "epoch": 0.31310344827586206, "grad_norm": 3.8865299224853516, "learning_rate": 9.995674050885074e-06, "loss": 1.3075, "step": 1135 }, { "epoch": 0.3133793103448276, "grad_norm": 3.990079402923584, "learning_rate": 9.995636026279126e-06, "loss": 1.1719, "step": 1136 }, { "epoch": 0.3136551724137931, "grad_norm": 4.260281085968018, "learning_rate": 9.995597835360586e-06, "loss": 1.2123, "step": 1137 }, { "epoch": 0.3139310344827586, "grad_norm": 4.199334621429443, "learning_rate": 9.99555947813073e-06, "loss": 1.3233, "step": 1138 }, { "epoch": 0.3142068965517241, "grad_norm": 4.561686038970947, "learning_rate": 9.995520954590832e-06, "loss": 1.284, "step": 1139 }, { "epoch": 0.31448275862068964, "grad_norm": 4.205078125, "learning_rate": 9.995482264742174e-06, "loss": 1.349, "step": 1140 }, { "epoch": 0.31475862068965516, "grad_norm": 3.8321523666381836, "learning_rate": 9.995443408586046e-06, "loss": 1.2427, "step": 1141 }, { "epoch": 0.31503448275862067, "grad_norm": 4.023752212524414, "learning_rate": 9.995404386123742e-06, "loss": 1.3968, "step": 1142 }, { "epoch": 0.3153103448275862, "grad_norm": 4.122349262237549, "learning_rate": 9.995365197356558e-06, "loss": 1.4105, "step": 1143 }, { "epoch": 0.3155862068965517, "grad_norm": 4.172778129577637, "learning_rate": 9.995325842285801e-06, "loss": 1.2208, "step": 1144 }, { "epoch": 0.3158620689655172, "grad_norm": 4.188494682312012, "learning_rate": 9.995286320912783e-06, "loss": 1.3412, "step": 1145 }, { "epoch": 0.31613793103448273, "grad_norm": 4.330320358276367, "learning_rate": 9.995246633238816e-06, "loss": 1.3262, "step": 1146 }, { "epoch": 0.31641379310344825, "grad_norm": 3.998673915863037, "learning_rate": 9.995206779265223e-06, "loss": 1.2947, "step": 1147 }, { "epoch": 0.3166896551724138, "grad_norm": 3.9600625038146973, "learning_rate": 9.99516675899333e-06, "loss": 1.3764, "step": 1148 }, { "epoch": 0.31696551724137934, "grad_norm": 3.8850667476654053, "learning_rate": 9.995126572424469e-06, "loss": 1.3182, "step": 1149 }, { "epoch": 0.31724137931034485, "grad_norm": 4.371672630310059, "learning_rate": 9.995086219559981e-06, "loss": 1.2145, "step": 1150 }, { "epoch": 0.31751724137931037, "grad_norm": 4.469352722167969, "learning_rate": 9.995045700401205e-06, "loss": 1.4464, "step": 1151 }, { "epoch": 0.3177931034482759, "grad_norm": 4.516106605529785, "learning_rate": 9.995005014949495e-06, "loss": 1.3096, "step": 1152 }, { "epoch": 0.3180689655172414, "grad_norm": 4.5428924560546875, "learning_rate": 9.994964163206201e-06, "loss": 1.4176, "step": 1153 }, { "epoch": 0.3183448275862069, "grad_norm": 3.623727321624756, "learning_rate": 9.994923145172687e-06, "loss": 1.1933, "step": 1154 }, { "epoch": 0.31862068965517243, "grad_norm": 4.178208827972412, "learning_rate": 9.994881960850312e-06, "loss": 1.5168, "step": 1155 }, { "epoch": 0.31889655172413794, "grad_norm": 4.35960578918457, "learning_rate": 9.994840610240454e-06, "loss": 1.4911, "step": 1156 }, { "epoch": 0.31917241379310346, "grad_norm": 4.64047908782959, "learning_rate": 9.994799093344486e-06, "loss": 1.2859, "step": 1157 }, { "epoch": 0.319448275862069, "grad_norm": 4.047033786773682, "learning_rate": 9.99475741016379e-06, "loss": 1.2148, "step": 1158 }, { "epoch": 0.3197241379310345, "grad_norm": 4.441187858581543, "learning_rate": 9.99471556069976e-06, "loss": 1.2558, "step": 1159 }, { "epoch": 0.32, "grad_norm": 4.258638858795166, "learning_rate": 9.994673544953779e-06, "loss": 1.333, "step": 1160 }, { "epoch": 0.3202758620689655, "grad_norm": 4.12920618057251, "learning_rate": 9.994631362927252e-06, "loss": 1.3001, "step": 1161 }, { "epoch": 0.32055172413793104, "grad_norm": 3.771010160446167, "learning_rate": 9.994589014621583e-06, "loss": 1.1891, "step": 1162 }, { "epoch": 0.32082758620689655, "grad_norm": 3.9610376358032227, "learning_rate": 9.99454650003818e-06, "loss": 1.2843, "step": 1163 }, { "epoch": 0.32110344827586207, "grad_norm": 4.2668232917785645, "learning_rate": 9.99450381917846e-06, "loss": 1.2289, "step": 1164 }, { "epoch": 0.3213793103448276, "grad_norm": 4.68328332901001, "learning_rate": 9.994460972043844e-06, "loss": 1.3181, "step": 1165 }, { "epoch": 0.3216551724137931, "grad_norm": 4.084867000579834, "learning_rate": 9.994417958635756e-06, "loss": 1.2878, "step": 1166 }, { "epoch": 0.3219310344827586, "grad_norm": 3.9352340698242188, "learning_rate": 9.994374778955631e-06, "loss": 1.4227, "step": 1167 }, { "epoch": 0.32220689655172413, "grad_norm": 4.480271339416504, "learning_rate": 9.994331433004904e-06, "loss": 1.3845, "step": 1168 }, { "epoch": 0.32248275862068965, "grad_norm": 3.9077131748199463, "learning_rate": 9.99428792078502e-06, "loss": 1.3119, "step": 1169 }, { "epoch": 0.32275862068965516, "grad_norm": 4.487433433532715, "learning_rate": 9.994244242297427e-06, "loss": 1.435, "step": 1170 }, { "epoch": 0.3230344827586207, "grad_norm": 3.856602191925049, "learning_rate": 9.994200397543579e-06, "loss": 1.2357, "step": 1171 }, { "epoch": 0.3233103448275862, "grad_norm": 4.177712440490723, "learning_rate": 9.994156386524935e-06, "loss": 1.2623, "step": 1172 }, { "epoch": 0.3235862068965517, "grad_norm": 3.9100165367126465, "learning_rate": 9.994112209242962e-06, "loss": 1.3912, "step": 1173 }, { "epoch": 0.3238620689655172, "grad_norm": 4.4008026123046875, "learning_rate": 9.994067865699129e-06, "loss": 1.5116, "step": 1174 }, { "epoch": 0.32413793103448274, "grad_norm": 4.345281600952148, "learning_rate": 9.994023355894914e-06, "loss": 1.2892, "step": 1175 }, { "epoch": 0.32441379310344826, "grad_norm": 4.078221797943115, "learning_rate": 9.993978679831795e-06, "loss": 1.3064, "step": 1176 }, { "epoch": 0.32468965517241377, "grad_norm": 4.0060200691223145, "learning_rate": 9.993933837511263e-06, "loss": 1.2468, "step": 1177 }, { "epoch": 0.3249655172413793, "grad_norm": 4.37706995010376, "learning_rate": 9.99388882893481e-06, "loss": 1.279, "step": 1178 }, { "epoch": 0.3252413793103448, "grad_norm": 4.81078577041626, "learning_rate": 9.993843654103936e-06, "loss": 1.4259, "step": 1179 }, { "epoch": 0.3255172413793103, "grad_norm": 4.596116542816162, "learning_rate": 9.993798313020142e-06, "loss": 1.3735, "step": 1180 }, { "epoch": 0.3257931034482759, "grad_norm": 3.804947853088379, "learning_rate": 9.993752805684937e-06, "loss": 1.2412, "step": 1181 }, { "epoch": 0.3260689655172414, "grad_norm": 4.740228652954102, "learning_rate": 9.99370713209984e-06, "loss": 1.2896, "step": 1182 }, { "epoch": 0.3263448275862069, "grad_norm": 4.64845609664917, "learning_rate": 9.993661292266368e-06, "loss": 1.2946, "step": 1183 }, { "epoch": 0.32662068965517244, "grad_norm": 3.991196393966675, "learning_rate": 9.99361528618605e-06, "loss": 1.2905, "step": 1184 }, { "epoch": 0.32689655172413795, "grad_norm": 4.123058319091797, "learning_rate": 9.993569113860412e-06, "loss": 1.2891, "step": 1185 }, { "epoch": 0.32717241379310347, "grad_norm": 4.945375442504883, "learning_rate": 9.993522775291e-06, "loss": 1.2926, "step": 1186 }, { "epoch": 0.327448275862069, "grad_norm": 3.836580514907837, "learning_rate": 9.99347627047935e-06, "loss": 1.2714, "step": 1187 }, { "epoch": 0.3277241379310345, "grad_norm": 4.439856052398682, "learning_rate": 9.99342959942701e-06, "loss": 1.2707, "step": 1188 }, { "epoch": 0.328, "grad_norm": 3.924170970916748, "learning_rate": 9.993382762135537e-06, "loss": 1.218, "step": 1189 }, { "epoch": 0.32827586206896553, "grad_norm": 4.018409729003906, "learning_rate": 9.993335758606489e-06, "loss": 1.2934, "step": 1190 }, { "epoch": 0.32855172413793104, "grad_norm": 3.846651077270508, "learning_rate": 9.993288588841432e-06, "loss": 1.1372, "step": 1191 }, { "epoch": 0.32882758620689656, "grad_norm": 4.202334880828857, "learning_rate": 9.993241252841933e-06, "loss": 1.4161, "step": 1192 }, { "epoch": 0.3291034482758621, "grad_norm": 4.451509475708008, "learning_rate": 9.993193750609571e-06, "loss": 1.2742, "step": 1193 }, { "epoch": 0.3293793103448276, "grad_norm": 4.188808441162109, "learning_rate": 9.993146082145928e-06, "loss": 1.3215, "step": 1194 }, { "epoch": 0.3296551724137931, "grad_norm": 3.84039568901062, "learning_rate": 9.993098247452589e-06, "loss": 1.0954, "step": 1195 }, { "epoch": 0.3299310344827586, "grad_norm": 3.9789891242980957, "learning_rate": 9.993050246531146e-06, "loss": 1.387, "step": 1196 }, { "epoch": 0.33020689655172414, "grad_norm": 4.276596546173096, "learning_rate": 9.993002079383199e-06, "loss": 1.3335, "step": 1197 }, { "epoch": 0.33048275862068965, "grad_norm": 4.396164894104004, "learning_rate": 9.992953746010349e-06, "loss": 1.165, "step": 1198 }, { "epoch": 0.33075862068965517, "grad_norm": 4.186651229858398, "learning_rate": 9.992905246414208e-06, "loss": 1.3237, "step": 1199 }, { "epoch": 0.3310344827586207, "grad_norm": 3.9383609294891357, "learning_rate": 9.99285658059639e-06, "loss": 1.2767, "step": 1200 }, { "epoch": 0.3313103448275862, "grad_norm": 3.984218120574951, "learning_rate": 9.99280774855851e-06, "loss": 1.2686, "step": 1201 }, { "epoch": 0.3315862068965517, "grad_norm": 4.147161960601807, "learning_rate": 9.992758750302204e-06, "loss": 1.2786, "step": 1202 }, { "epoch": 0.33186206896551723, "grad_norm": 4.048114776611328, "learning_rate": 9.992709585829092e-06, "loss": 1.2509, "step": 1203 }, { "epoch": 0.33213793103448275, "grad_norm": 4.205873489379883, "learning_rate": 9.99266025514082e-06, "loss": 1.4516, "step": 1204 }, { "epoch": 0.33241379310344826, "grad_norm": 4.028230667114258, "learning_rate": 9.992610758239026e-06, "loss": 1.3631, "step": 1205 }, { "epoch": 0.3326896551724138, "grad_norm": 4.200658321380615, "learning_rate": 9.992561095125357e-06, "loss": 1.2419, "step": 1206 }, { "epoch": 0.3329655172413793, "grad_norm": 4.047640323638916, "learning_rate": 9.992511265801468e-06, "loss": 1.3667, "step": 1207 }, { "epoch": 0.3332413793103448, "grad_norm": 4.043861389160156, "learning_rate": 9.992461270269018e-06, "loss": 1.4482, "step": 1208 }, { "epoch": 0.3335172413793103, "grad_norm": 3.812678813934326, "learning_rate": 9.992411108529669e-06, "loss": 1.204, "step": 1209 }, { "epoch": 0.33379310344827584, "grad_norm": 4.276828765869141, "learning_rate": 9.992360780585095e-06, "loss": 1.382, "step": 1210 }, { "epoch": 0.33406896551724136, "grad_norm": 4.22460412979126, "learning_rate": 9.99231028643697e-06, "loss": 1.3061, "step": 1211 }, { "epoch": 0.33434482758620687, "grad_norm": 4.406511306762695, "learning_rate": 9.992259626086972e-06, "loss": 1.2518, "step": 1212 }, { "epoch": 0.3346206896551724, "grad_norm": 4.190263271331787, "learning_rate": 9.992208799536792e-06, "loss": 1.2307, "step": 1213 }, { "epoch": 0.33489655172413796, "grad_norm": 4.470768451690674, "learning_rate": 9.992157806788118e-06, "loss": 1.3208, "step": 1214 }, { "epoch": 0.3351724137931035, "grad_norm": 4.0452070236206055, "learning_rate": 9.992106647842652e-06, "loss": 1.2968, "step": 1215 }, { "epoch": 0.335448275862069, "grad_norm": 4.396645545959473, "learning_rate": 9.992055322702095e-06, "loss": 1.3714, "step": 1216 }, { "epoch": 0.3357241379310345, "grad_norm": 3.9805092811584473, "learning_rate": 9.992003831368154e-06, "loss": 1.2925, "step": 1217 }, { "epoch": 0.336, "grad_norm": 4.008246421813965, "learning_rate": 9.991952173842546e-06, "loss": 1.1432, "step": 1218 }, { "epoch": 0.33627586206896554, "grad_norm": 4.09688663482666, "learning_rate": 9.991900350126988e-06, "loss": 1.4212, "step": 1219 }, { "epoch": 0.33655172413793105, "grad_norm": 4.151003360748291, "learning_rate": 9.991848360223207e-06, "loss": 1.3408, "step": 1220 }, { "epoch": 0.33682758620689657, "grad_norm": 4.003342151641846, "learning_rate": 9.991796204132933e-06, "loss": 1.2962, "step": 1221 }, { "epoch": 0.3371034482758621, "grad_norm": 4.079667091369629, "learning_rate": 9.991743881857904e-06, "loss": 1.3698, "step": 1222 }, { "epoch": 0.3373793103448276, "grad_norm": 4.031398296356201, "learning_rate": 9.991691393399863e-06, "loss": 1.2481, "step": 1223 }, { "epoch": 0.3376551724137931, "grad_norm": 4.228445529937744, "learning_rate": 9.991638738760554e-06, "loss": 1.2451, "step": 1224 }, { "epoch": 0.33793103448275863, "grad_norm": 4.024698734283447, "learning_rate": 9.991585917941731e-06, "loss": 1.2057, "step": 1225 }, { "epoch": 0.33820689655172415, "grad_norm": 3.892238140106201, "learning_rate": 9.991532930945152e-06, "loss": 1.294, "step": 1226 }, { "epoch": 0.33848275862068966, "grad_norm": 4.027159214019775, "learning_rate": 9.991479777772583e-06, "loss": 1.3626, "step": 1227 }, { "epoch": 0.3387586206896552, "grad_norm": 3.9991517066955566, "learning_rate": 9.991426458425793e-06, "loss": 1.2552, "step": 1228 }, { "epoch": 0.3390344827586207, "grad_norm": 3.6124727725982666, "learning_rate": 9.991372972906555e-06, "loss": 1.2675, "step": 1229 }, { "epoch": 0.3393103448275862, "grad_norm": 4.206104278564453, "learning_rate": 9.991319321216651e-06, "loss": 1.3245, "step": 1230 }, { "epoch": 0.3395862068965517, "grad_norm": 3.830157518386841, "learning_rate": 9.991265503357868e-06, "loss": 1.2086, "step": 1231 }, { "epoch": 0.33986206896551724, "grad_norm": 3.9042906761169434, "learning_rate": 9.991211519331996e-06, "loss": 1.33, "step": 1232 }, { "epoch": 0.34013793103448275, "grad_norm": 4.138256549835205, "learning_rate": 9.991157369140834e-06, "loss": 1.3302, "step": 1233 }, { "epoch": 0.34041379310344827, "grad_norm": 4.590444087982178, "learning_rate": 9.991103052786183e-06, "loss": 1.2209, "step": 1234 }, { "epoch": 0.3406896551724138, "grad_norm": 4.241447448730469, "learning_rate": 9.991048570269852e-06, "loss": 1.3814, "step": 1235 }, { "epoch": 0.3409655172413793, "grad_norm": 4.342823505401611, "learning_rate": 9.990993921593657e-06, "loss": 1.4239, "step": 1236 }, { "epoch": 0.3412413793103448, "grad_norm": 4.184075355529785, "learning_rate": 9.990939106759414e-06, "loss": 1.3744, "step": 1237 }, { "epoch": 0.34151724137931033, "grad_norm": 3.7961130142211914, "learning_rate": 9.990884125768949e-06, "loss": 1.2499, "step": 1238 }, { "epoch": 0.34179310344827585, "grad_norm": 4.104109287261963, "learning_rate": 9.990828978624092e-06, "loss": 1.3506, "step": 1239 }, { "epoch": 0.34206896551724136, "grad_norm": 4.267556667327881, "learning_rate": 9.990773665326681e-06, "loss": 1.3014, "step": 1240 }, { "epoch": 0.3423448275862069, "grad_norm": 3.970419406890869, "learning_rate": 9.990718185878555e-06, "loss": 1.2903, "step": 1241 }, { "epoch": 0.3426206896551724, "grad_norm": 4.119626998901367, "learning_rate": 9.990662540281561e-06, "loss": 1.299, "step": 1242 }, { "epoch": 0.3428965517241379, "grad_norm": 4.043395042419434, "learning_rate": 9.990606728537555e-06, "loss": 1.3938, "step": 1243 }, { "epoch": 0.3431724137931034, "grad_norm": 4.290748596191406, "learning_rate": 9.990550750648391e-06, "loss": 1.3759, "step": 1244 }, { "epoch": 0.34344827586206894, "grad_norm": 4.228410243988037, "learning_rate": 9.990494606615934e-06, "loss": 1.3863, "step": 1245 }, { "epoch": 0.34372413793103446, "grad_norm": 4.097527980804443, "learning_rate": 9.990438296442054e-06, "loss": 1.1973, "step": 1246 }, { "epoch": 0.344, "grad_norm": 3.9192657470703125, "learning_rate": 9.990381820128624e-06, "loss": 1.3986, "step": 1247 }, { "epoch": 0.34427586206896554, "grad_norm": 3.977185010910034, "learning_rate": 9.990325177677526e-06, "loss": 1.3085, "step": 1248 }, { "epoch": 0.34455172413793106, "grad_norm": 3.9206504821777344, "learning_rate": 9.990268369090643e-06, "loss": 1.328, "step": 1249 }, { "epoch": 0.3448275862068966, "grad_norm": 4.011115074157715, "learning_rate": 9.990211394369871e-06, "loss": 1.3469, "step": 1250 }, { "epoch": 0.3451034482758621, "grad_norm": 3.704336404800415, "learning_rate": 9.990154253517102e-06, "loss": 1.2178, "step": 1251 }, { "epoch": 0.3453793103448276, "grad_norm": 4.1151933670043945, "learning_rate": 9.99009694653424e-06, "loss": 1.3169, "step": 1252 }, { "epoch": 0.3456551724137931, "grad_norm": 4.422471523284912, "learning_rate": 9.990039473423195e-06, "loss": 1.3601, "step": 1253 }, { "epoch": 0.34593103448275864, "grad_norm": 3.7162394523620605, "learning_rate": 9.989981834185875e-06, "loss": 1.1687, "step": 1254 }, { "epoch": 0.34620689655172415, "grad_norm": 4.29964542388916, "learning_rate": 9.989924028824206e-06, "loss": 1.4115, "step": 1255 }, { "epoch": 0.34648275862068967, "grad_norm": 5.044336318969727, "learning_rate": 9.989866057340105e-06, "loss": 1.4072, "step": 1256 }, { "epoch": 0.3467586206896552, "grad_norm": 4.121173858642578, "learning_rate": 9.989807919735508e-06, "loss": 1.35, "step": 1257 }, { "epoch": 0.3470344827586207, "grad_norm": 4.320423126220703, "learning_rate": 9.989749616012347e-06, "loss": 1.3309, "step": 1258 }, { "epoch": 0.3473103448275862, "grad_norm": 4.2502827644348145, "learning_rate": 9.989691146172566e-06, "loss": 1.4142, "step": 1259 }, { "epoch": 0.34758620689655173, "grad_norm": 3.7928779125213623, "learning_rate": 9.989632510218108e-06, "loss": 1.2109, "step": 1260 }, { "epoch": 0.34786206896551725, "grad_norm": 3.712486743927002, "learning_rate": 9.989573708150928e-06, "loss": 1.2097, "step": 1261 }, { "epoch": 0.34813793103448276, "grad_norm": 4.261066436767578, "learning_rate": 9.98951473997298e-06, "loss": 1.4939, "step": 1262 }, { "epoch": 0.3484137931034483, "grad_norm": 5.915440082550049, "learning_rate": 9.989455605686232e-06, "loss": 1.219, "step": 1263 }, { "epoch": 0.3486896551724138, "grad_norm": 4.3720245361328125, "learning_rate": 9.989396305292648e-06, "loss": 1.3351, "step": 1264 }, { "epoch": 0.3489655172413793, "grad_norm": 4.097949504852295, "learning_rate": 9.989336838794208e-06, "loss": 1.3117, "step": 1265 }, { "epoch": 0.3492413793103448, "grad_norm": 4.444793224334717, "learning_rate": 9.989277206192886e-06, "loss": 1.2427, "step": 1266 }, { "epoch": 0.34951724137931034, "grad_norm": 4.215609073638916, "learning_rate": 9.98921740749067e-06, "loss": 1.3504, "step": 1267 }, { "epoch": 0.34979310344827585, "grad_norm": 3.7380847930908203, "learning_rate": 9.98915744268955e-06, "loss": 1.1751, "step": 1268 }, { "epoch": 0.35006896551724137, "grad_norm": 4.514756679534912, "learning_rate": 9.989097311791523e-06, "loss": 1.2755, "step": 1269 }, { "epoch": 0.3503448275862069, "grad_norm": 4.22389030456543, "learning_rate": 9.989037014798589e-06, "loss": 1.3867, "step": 1270 }, { "epoch": 0.3506206896551724, "grad_norm": 3.961782455444336, "learning_rate": 9.988976551712757e-06, "loss": 1.2757, "step": 1271 }, { "epoch": 0.3508965517241379, "grad_norm": 3.590972661972046, "learning_rate": 9.98891592253604e-06, "loss": 1.2083, "step": 1272 }, { "epoch": 0.35117241379310343, "grad_norm": 4.0264716148376465, "learning_rate": 9.988855127270458e-06, "loss": 1.2051, "step": 1273 }, { "epoch": 0.35144827586206895, "grad_norm": 4.17984676361084, "learning_rate": 9.98879416591803e-06, "loss": 1.3213, "step": 1274 }, { "epoch": 0.35172413793103446, "grad_norm": 4.04011344909668, "learning_rate": 9.988733038480792e-06, "loss": 1.1875, "step": 1275 }, { "epoch": 0.352, "grad_norm": 3.9925696849823, "learning_rate": 9.988671744960773e-06, "loss": 1.1485, "step": 1276 }, { "epoch": 0.3522758620689655, "grad_norm": 3.6962406635284424, "learning_rate": 9.988610285360018e-06, "loss": 1.298, "step": 1277 }, { "epoch": 0.352551724137931, "grad_norm": 4.245465278625488, "learning_rate": 9.98854865968057e-06, "loss": 1.4402, "step": 1278 }, { "epoch": 0.3528275862068965, "grad_norm": 4.101736068725586, "learning_rate": 9.988486867924482e-06, "loss": 1.4132, "step": 1279 }, { "epoch": 0.35310344827586204, "grad_norm": 4.209689617156982, "learning_rate": 9.988424910093811e-06, "loss": 1.3009, "step": 1280 }, { "epoch": 0.3533793103448276, "grad_norm": 4.079338073730469, "learning_rate": 9.98836278619062e-06, "loss": 1.4389, "step": 1281 }, { "epoch": 0.35365517241379313, "grad_norm": 4.423889636993408, "learning_rate": 9.988300496216978e-06, "loss": 1.4175, "step": 1282 }, { "epoch": 0.35393103448275864, "grad_norm": 4.39590311050415, "learning_rate": 9.988238040174955e-06, "loss": 1.3033, "step": 1283 }, { "epoch": 0.35420689655172416, "grad_norm": 3.7893073558807373, "learning_rate": 9.988175418066635e-06, "loss": 1.2235, "step": 1284 }, { "epoch": 0.3544827586206897, "grad_norm": 4.063144207000732, "learning_rate": 9.988112629894101e-06, "loss": 1.4194, "step": 1285 }, { "epoch": 0.3547586206896552, "grad_norm": 4.139680862426758, "learning_rate": 9.988049675659443e-06, "loss": 1.2963, "step": 1286 }, { "epoch": 0.3550344827586207, "grad_norm": 4.318148612976074, "learning_rate": 9.987986555364755e-06, "loss": 1.2999, "step": 1287 }, { "epoch": 0.3553103448275862, "grad_norm": 4.33256196975708, "learning_rate": 9.987923269012142e-06, "loss": 1.4804, "step": 1288 }, { "epoch": 0.35558620689655174, "grad_norm": 4.593536376953125, "learning_rate": 9.98785981660371e-06, "loss": 1.318, "step": 1289 }, { "epoch": 0.35586206896551725, "grad_norm": 3.7012088298797607, "learning_rate": 9.987796198141569e-06, "loss": 1.1952, "step": 1290 }, { "epoch": 0.35613793103448277, "grad_norm": 3.719561815261841, "learning_rate": 9.987732413627837e-06, "loss": 1.2201, "step": 1291 }, { "epoch": 0.3564137931034483, "grad_norm": 3.6683340072631836, "learning_rate": 9.987668463064642e-06, "loss": 1.2001, "step": 1292 }, { "epoch": 0.3566896551724138, "grad_norm": 4.066551208496094, "learning_rate": 9.987604346454107e-06, "loss": 1.2801, "step": 1293 }, { "epoch": 0.3569655172413793, "grad_norm": 3.633326768875122, "learning_rate": 9.987540063798372e-06, "loss": 1.185, "step": 1294 }, { "epoch": 0.35724137931034483, "grad_norm": 4.183876037597656, "learning_rate": 9.987475615099574e-06, "loss": 1.2089, "step": 1295 }, { "epoch": 0.35751724137931035, "grad_norm": 4.271857261657715, "learning_rate": 9.987411000359859e-06, "loss": 1.2185, "step": 1296 }, { "epoch": 0.35779310344827586, "grad_norm": 4.135492324829102, "learning_rate": 9.987346219581378e-06, "loss": 1.4453, "step": 1297 }, { "epoch": 0.3580689655172414, "grad_norm": 3.9216995239257812, "learning_rate": 9.98728127276629e-06, "loss": 1.163, "step": 1298 }, { "epoch": 0.3583448275862069, "grad_norm": 3.9936776161193848, "learning_rate": 9.987216159916753e-06, "loss": 1.2593, "step": 1299 }, { "epoch": 0.3586206896551724, "grad_norm": 4.187252044677734, "learning_rate": 9.987150881034937e-06, "loss": 1.3492, "step": 1300 }, { "epoch": 0.3588965517241379, "grad_norm": 3.764119863510132, "learning_rate": 9.987085436123016e-06, "loss": 1.2767, "step": 1301 }, { "epoch": 0.35917241379310344, "grad_norm": 3.8951492309570312, "learning_rate": 9.987019825183168e-06, "loss": 1.1097, "step": 1302 }, { "epoch": 0.35944827586206896, "grad_norm": 4.599253177642822, "learning_rate": 9.986954048217578e-06, "loss": 1.3221, "step": 1303 }, { "epoch": 0.35972413793103447, "grad_norm": 4.17017126083374, "learning_rate": 9.986888105228434e-06, "loss": 1.2931, "step": 1304 }, { "epoch": 0.36, "grad_norm": 4.298329830169678, "learning_rate": 9.986821996217933e-06, "loss": 1.2908, "step": 1305 }, { "epoch": 0.3602758620689655, "grad_norm": 4.302021503448486, "learning_rate": 9.986755721188274e-06, "loss": 1.3607, "step": 1306 }, { "epoch": 0.360551724137931, "grad_norm": 4.1804680824279785, "learning_rate": 9.986689280141665e-06, "loss": 1.4051, "step": 1307 }, { "epoch": 0.36082758620689653, "grad_norm": 3.852412223815918, "learning_rate": 9.986622673080317e-06, "loss": 1.3622, "step": 1308 }, { "epoch": 0.36110344827586205, "grad_norm": 4.305740833282471, "learning_rate": 9.98655590000645e-06, "loss": 1.2141, "step": 1309 }, { "epoch": 0.36137931034482756, "grad_norm": 4.412680625915527, "learning_rate": 9.986488960922284e-06, "loss": 1.3413, "step": 1310 }, { "epoch": 0.3616551724137931, "grad_norm": 4.020736217498779, "learning_rate": 9.986421855830049e-06, "loss": 1.2461, "step": 1311 }, { "epoch": 0.3619310344827586, "grad_norm": 3.799246072769165, "learning_rate": 9.986354584731977e-06, "loss": 1.3307, "step": 1312 }, { "epoch": 0.3622068965517241, "grad_norm": 4.133479595184326, "learning_rate": 9.986287147630312e-06, "loss": 1.1534, "step": 1313 }, { "epoch": 0.3624827586206897, "grad_norm": 4.391323566436768, "learning_rate": 9.986219544527293e-06, "loss": 1.4178, "step": 1314 }, { "epoch": 0.3627586206896552, "grad_norm": 3.62511944770813, "learning_rate": 9.986151775425175e-06, "loss": 1.2334, "step": 1315 }, { "epoch": 0.3630344827586207, "grad_norm": 3.9370179176330566, "learning_rate": 9.986083840326213e-06, "loss": 1.2643, "step": 1316 }, { "epoch": 0.36331034482758623, "grad_norm": 3.946362257003784, "learning_rate": 9.986015739232668e-06, "loss": 1.1981, "step": 1317 }, { "epoch": 0.36358620689655174, "grad_norm": 4.246631622314453, "learning_rate": 9.98594747214681e-06, "loss": 1.3734, "step": 1318 }, { "epoch": 0.36386206896551726, "grad_norm": 4.243740081787109, "learning_rate": 9.985879039070907e-06, "loss": 1.3405, "step": 1319 }, { "epoch": 0.3641379310344828, "grad_norm": 4.208371639251709, "learning_rate": 9.985810440007242e-06, "loss": 1.1398, "step": 1320 }, { "epoch": 0.3644137931034483, "grad_norm": 3.725706100463867, "learning_rate": 9.985741674958094e-06, "loss": 1.3331, "step": 1321 }, { "epoch": 0.3646896551724138, "grad_norm": 3.872864246368408, "learning_rate": 9.985672743925758e-06, "loss": 1.253, "step": 1322 }, { "epoch": 0.3649655172413793, "grad_norm": 4.082672119140625, "learning_rate": 9.985603646912523e-06, "loss": 1.4087, "step": 1323 }, { "epoch": 0.36524137931034484, "grad_norm": 3.8189713954925537, "learning_rate": 9.985534383920693e-06, "loss": 1.3001, "step": 1324 }, { "epoch": 0.36551724137931035, "grad_norm": 4.5136494636535645, "learning_rate": 9.985464954952572e-06, "loss": 1.3034, "step": 1325 }, { "epoch": 0.36579310344827587, "grad_norm": 4.411065578460693, "learning_rate": 9.985395360010476e-06, "loss": 1.3552, "step": 1326 }, { "epoch": 0.3660689655172414, "grad_norm": 4.153990745544434, "learning_rate": 9.985325599096713e-06, "loss": 1.4134, "step": 1327 }, { "epoch": 0.3663448275862069, "grad_norm": 4.311403751373291, "learning_rate": 9.985255672213613e-06, "loss": 1.3779, "step": 1328 }, { "epoch": 0.3666206896551724, "grad_norm": 4.017555236816406, "learning_rate": 9.985185579363502e-06, "loss": 1.2308, "step": 1329 }, { "epoch": 0.36689655172413793, "grad_norm": 4.808114528656006, "learning_rate": 9.985115320548712e-06, "loss": 1.3699, "step": 1330 }, { "epoch": 0.36717241379310345, "grad_norm": 3.754685401916504, "learning_rate": 9.985044895771584e-06, "loss": 1.2595, "step": 1331 }, { "epoch": 0.36744827586206896, "grad_norm": 3.9902451038360596, "learning_rate": 9.984974305034463e-06, "loss": 1.279, "step": 1332 }, { "epoch": 0.3677241379310345, "grad_norm": 3.935570240020752, "learning_rate": 9.984903548339697e-06, "loss": 1.3703, "step": 1333 }, { "epoch": 0.368, "grad_norm": 3.9247889518737793, "learning_rate": 9.98483262568964e-06, "loss": 1.2919, "step": 1334 }, { "epoch": 0.3682758620689655, "grad_norm": 3.7216269969940186, "learning_rate": 9.984761537086658e-06, "loss": 1.2914, "step": 1335 }, { "epoch": 0.368551724137931, "grad_norm": 4.786232948303223, "learning_rate": 9.984690282533115e-06, "loss": 1.4666, "step": 1336 }, { "epoch": 0.36882758620689654, "grad_norm": 4.53890323638916, "learning_rate": 9.984618862031384e-06, "loss": 1.2921, "step": 1337 }, { "epoch": 0.36910344827586206, "grad_norm": 4.0696330070495605, "learning_rate": 9.98454727558384e-06, "loss": 1.2937, "step": 1338 }, { "epoch": 0.36937931034482757, "grad_norm": 3.932687759399414, "learning_rate": 9.984475523192869e-06, "loss": 1.2369, "step": 1339 }, { "epoch": 0.3696551724137931, "grad_norm": 3.967050552368164, "learning_rate": 9.98440360486086e-06, "loss": 1.3212, "step": 1340 }, { "epoch": 0.3699310344827586, "grad_norm": 3.9215500354766846, "learning_rate": 9.984331520590205e-06, "loss": 1.4245, "step": 1341 }, { "epoch": 0.3702068965517241, "grad_norm": 4.085572242736816, "learning_rate": 9.984259270383306e-06, "loss": 1.3525, "step": 1342 }, { "epoch": 0.37048275862068963, "grad_norm": 3.7353909015655518, "learning_rate": 9.984186854242568e-06, "loss": 1.121, "step": 1343 }, { "epoch": 0.37075862068965515, "grad_norm": 3.755154848098755, "learning_rate": 9.9841142721704e-06, "loss": 1.1791, "step": 1344 }, { "epoch": 0.37103448275862067, "grad_norm": 3.9790494441986084, "learning_rate": 9.98404152416922e-06, "loss": 1.3917, "step": 1345 }, { "epoch": 0.3713103448275862, "grad_norm": 3.98948335647583, "learning_rate": 9.98396861024145e-06, "loss": 1.2811, "step": 1346 }, { "epoch": 0.37158620689655175, "grad_norm": 4.0430073738098145, "learning_rate": 9.983895530389516e-06, "loss": 1.3283, "step": 1347 }, { "epoch": 0.37186206896551727, "grad_norm": 3.8636081218719482, "learning_rate": 9.983822284615853e-06, "loss": 1.3144, "step": 1348 }, { "epoch": 0.3721379310344828, "grad_norm": 4.161283016204834, "learning_rate": 9.983748872922898e-06, "loss": 1.3129, "step": 1349 }, { "epoch": 0.3724137931034483, "grad_norm": 3.8795201778411865, "learning_rate": 9.983675295313096e-06, "loss": 1.3358, "step": 1350 }, { "epoch": 0.3726896551724138, "grad_norm": 3.988206386566162, "learning_rate": 9.983601551788895e-06, "loss": 1.2853, "step": 1351 }, { "epoch": 0.37296551724137933, "grad_norm": 3.9924962520599365, "learning_rate": 9.98352764235275e-06, "loss": 1.3146, "step": 1352 }, { "epoch": 0.37324137931034485, "grad_norm": 3.994016408920288, "learning_rate": 9.983453567007124e-06, "loss": 1.3669, "step": 1353 }, { "epoch": 0.37351724137931036, "grad_norm": 4.369632720947266, "learning_rate": 9.983379325754481e-06, "loss": 1.3209, "step": 1354 }, { "epoch": 0.3737931034482759, "grad_norm": 4.232319355010986, "learning_rate": 9.983304918597295e-06, "loss": 1.2455, "step": 1355 }, { "epoch": 0.3740689655172414, "grad_norm": 4.446051120758057, "learning_rate": 9.983230345538038e-06, "loss": 1.2524, "step": 1356 }, { "epoch": 0.3743448275862069, "grad_norm": 4.182830810546875, "learning_rate": 9.983155606579197e-06, "loss": 1.3073, "step": 1357 }, { "epoch": 0.3746206896551724, "grad_norm": 4.219051837921143, "learning_rate": 9.98308070172326e-06, "loss": 1.2314, "step": 1358 }, { "epoch": 0.37489655172413794, "grad_norm": 4.359668731689453, "learning_rate": 9.98300563097272e-06, "loss": 1.3432, "step": 1359 }, { "epoch": 0.37517241379310345, "grad_norm": 3.916405439376831, "learning_rate": 9.982930394330074e-06, "loss": 1.2502, "step": 1360 }, { "epoch": 0.37544827586206897, "grad_norm": 4.165340423583984, "learning_rate": 9.982854991797832e-06, "loss": 1.3205, "step": 1361 }, { "epoch": 0.3757241379310345, "grad_norm": 3.836862802505493, "learning_rate": 9.982779423378498e-06, "loss": 1.3729, "step": 1362 }, { "epoch": 0.376, "grad_norm": 4.130124568939209, "learning_rate": 9.982703689074593e-06, "loss": 1.3425, "step": 1363 }, { "epoch": 0.3762758620689655, "grad_norm": 4.356052398681641, "learning_rate": 9.982627788888635e-06, "loss": 1.2629, "step": 1364 }, { "epoch": 0.37655172413793103, "grad_norm": 4.048943519592285, "learning_rate": 9.982551722823152e-06, "loss": 1.3692, "step": 1365 }, { "epoch": 0.37682758620689655, "grad_norm": 3.99225115776062, "learning_rate": 9.982475490880678e-06, "loss": 1.2689, "step": 1366 }, { "epoch": 0.37710344827586206, "grad_norm": 4.137761116027832, "learning_rate": 9.982399093063749e-06, "loss": 1.3437, "step": 1367 }, { "epoch": 0.3773793103448276, "grad_norm": 3.9684576988220215, "learning_rate": 9.982322529374907e-06, "loss": 1.2467, "step": 1368 }, { "epoch": 0.3776551724137931, "grad_norm": 4.398645877838135, "learning_rate": 9.982245799816704e-06, "loss": 1.4077, "step": 1369 }, { "epoch": 0.3779310344827586, "grad_norm": 3.819809913635254, "learning_rate": 9.98216890439169e-06, "loss": 1.3479, "step": 1370 }, { "epoch": 0.3782068965517241, "grad_norm": 4.2175211906433105, "learning_rate": 9.982091843102432e-06, "loss": 1.3705, "step": 1371 }, { "epoch": 0.37848275862068964, "grad_norm": 4.545927047729492, "learning_rate": 9.98201461595149e-06, "loss": 1.2797, "step": 1372 }, { "epoch": 0.37875862068965516, "grad_norm": 3.6758949756622314, "learning_rate": 9.981937222941436e-06, "loss": 1.2735, "step": 1373 }, { "epoch": 0.37903448275862067, "grad_norm": 3.6358532905578613, "learning_rate": 9.981859664074848e-06, "loss": 1.174, "step": 1374 }, { "epoch": 0.3793103448275862, "grad_norm": 4.06286096572876, "learning_rate": 9.981781939354307e-06, "loss": 1.2669, "step": 1375 }, { "epoch": 0.3795862068965517, "grad_norm": 4.250113010406494, "learning_rate": 9.9817040487824e-06, "loss": 1.354, "step": 1376 }, { "epoch": 0.3798620689655172, "grad_norm": 4.39481782913208, "learning_rate": 9.981625992361722e-06, "loss": 1.3594, "step": 1377 }, { "epoch": 0.38013793103448273, "grad_norm": 3.762463092803955, "learning_rate": 9.981547770094869e-06, "loss": 1.2417, "step": 1378 }, { "epoch": 0.38041379310344825, "grad_norm": 3.8284382820129395, "learning_rate": 9.981469381984447e-06, "loss": 1.2591, "step": 1379 }, { "epoch": 0.38068965517241377, "grad_norm": 3.8476340770721436, "learning_rate": 9.981390828033065e-06, "loss": 1.285, "step": 1380 }, { "epoch": 0.38096551724137934, "grad_norm": 4.030149936676025, "learning_rate": 9.981312108243338e-06, "loss": 1.2774, "step": 1381 }, { "epoch": 0.38124137931034485, "grad_norm": 3.729780912399292, "learning_rate": 9.981233222617888e-06, "loss": 1.1485, "step": 1382 }, { "epoch": 0.38151724137931037, "grad_norm": 4.038679599761963, "learning_rate": 9.981154171159339e-06, "loss": 1.2897, "step": 1383 }, { "epoch": 0.3817931034482759, "grad_norm": 4.487790107727051, "learning_rate": 9.981074953870326e-06, "loss": 1.3825, "step": 1384 }, { "epoch": 0.3820689655172414, "grad_norm": 4.192556858062744, "learning_rate": 9.980995570753483e-06, "loss": 1.3014, "step": 1385 }, { "epoch": 0.3823448275862069, "grad_norm": 4.066813945770264, "learning_rate": 9.980916021811456e-06, "loss": 1.2149, "step": 1386 }, { "epoch": 0.38262068965517243, "grad_norm": 4.231672286987305, "learning_rate": 9.980836307046888e-06, "loss": 1.3842, "step": 1387 }, { "epoch": 0.38289655172413795, "grad_norm": 4.344812870025635, "learning_rate": 9.98075642646244e-06, "loss": 1.3853, "step": 1388 }, { "epoch": 0.38317241379310346, "grad_norm": 4.035260200500488, "learning_rate": 9.980676380060765e-06, "loss": 1.2, "step": 1389 }, { "epoch": 0.383448275862069, "grad_norm": 4.2752790451049805, "learning_rate": 9.980596167844532e-06, "loss": 1.2689, "step": 1390 }, { "epoch": 0.3837241379310345, "grad_norm": 3.776810646057129, "learning_rate": 9.98051578981641e-06, "loss": 1.2947, "step": 1391 }, { "epoch": 0.384, "grad_norm": 4.458671569824219, "learning_rate": 9.980435245979072e-06, "loss": 1.2454, "step": 1392 }, { "epoch": 0.3842758620689655, "grad_norm": 3.8749196529388428, "learning_rate": 9.980354536335205e-06, "loss": 1.3029, "step": 1393 }, { "epoch": 0.38455172413793104, "grad_norm": 3.910645008087158, "learning_rate": 9.980273660887492e-06, "loss": 1.3519, "step": 1394 }, { "epoch": 0.38482758620689655, "grad_norm": 3.8139920234680176, "learning_rate": 9.980192619638627e-06, "loss": 1.2565, "step": 1395 }, { "epoch": 0.38510344827586207, "grad_norm": 3.7529823780059814, "learning_rate": 9.980111412591308e-06, "loss": 1.3018, "step": 1396 }, { "epoch": 0.3853793103448276, "grad_norm": 4.074460983276367, "learning_rate": 9.980030039748239e-06, "loss": 1.2825, "step": 1397 }, { "epoch": 0.3856551724137931, "grad_norm": 3.8675668239593506, "learning_rate": 9.979948501112127e-06, "loss": 1.306, "step": 1398 }, { "epoch": 0.3859310344827586, "grad_norm": 4.110939979553223, "learning_rate": 9.979866796685686e-06, "loss": 1.358, "step": 1399 }, { "epoch": 0.38620689655172413, "grad_norm": 3.917438268661499, "learning_rate": 9.979784926471641e-06, "loss": 1.2854, "step": 1400 }, { "epoch": 0.38648275862068965, "grad_norm": 4.091922760009766, "learning_rate": 9.979702890472713e-06, "loss": 1.1711, "step": 1401 }, { "epoch": 0.38675862068965516, "grad_norm": 4.597204685211182, "learning_rate": 9.979620688691634e-06, "loss": 1.4387, "step": 1402 }, { "epoch": 0.3870344827586207, "grad_norm": 4.122961521148682, "learning_rate": 9.979538321131141e-06, "loss": 1.2379, "step": 1403 }, { "epoch": 0.3873103448275862, "grad_norm": 4.42548942565918, "learning_rate": 9.979455787793977e-06, "loss": 1.3794, "step": 1404 }, { "epoch": 0.3875862068965517, "grad_norm": 3.692553997039795, "learning_rate": 9.979373088682888e-06, "loss": 1.1729, "step": 1405 }, { "epoch": 0.3878620689655172, "grad_norm": 3.7884891033172607, "learning_rate": 9.97929022380063e-06, "loss": 1.2253, "step": 1406 }, { "epoch": 0.38813793103448274, "grad_norm": 4.209920406341553, "learning_rate": 9.979207193149958e-06, "loss": 1.321, "step": 1407 }, { "epoch": 0.38841379310344826, "grad_norm": 3.8458924293518066, "learning_rate": 9.979123996733639e-06, "loss": 1.2062, "step": 1408 }, { "epoch": 0.3886896551724138, "grad_norm": 4.065197944641113, "learning_rate": 9.979040634554443e-06, "loss": 1.3744, "step": 1409 }, { "epoch": 0.3889655172413793, "grad_norm": 4.294150352478027, "learning_rate": 9.978957106615142e-06, "loss": 1.1571, "step": 1410 }, { "epoch": 0.3892413793103448, "grad_norm": 3.8797895908355713, "learning_rate": 9.978873412918519e-06, "loss": 1.2116, "step": 1411 }, { "epoch": 0.3895172413793103, "grad_norm": 4.143162727355957, "learning_rate": 9.97878955346736e-06, "loss": 1.4784, "step": 1412 }, { "epoch": 0.38979310344827584, "grad_norm": 4.079013824462891, "learning_rate": 9.978705528264458e-06, "loss": 1.2039, "step": 1413 }, { "epoch": 0.3900689655172414, "grad_norm": 3.8577237129211426, "learning_rate": 9.978621337312608e-06, "loss": 1.2033, "step": 1414 }, { "epoch": 0.3903448275862069, "grad_norm": 3.857748508453369, "learning_rate": 9.978536980614615e-06, "loss": 1.2273, "step": 1415 }, { "epoch": 0.39062068965517244, "grad_norm": 4.250051498413086, "learning_rate": 9.978452458173287e-06, "loss": 1.2555, "step": 1416 }, { "epoch": 0.39089655172413795, "grad_norm": 4.457489967346191, "learning_rate": 9.978367769991435e-06, "loss": 1.2172, "step": 1417 }, { "epoch": 0.39117241379310347, "grad_norm": 4.041342258453369, "learning_rate": 9.978282916071882e-06, "loss": 1.4171, "step": 1418 }, { "epoch": 0.391448275862069, "grad_norm": 3.5735230445861816, "learning_rate": 9.978197896417452e-06, "loss": 1.3061, "step": 1419 }, { "epoch": 0.3917241379310345, "grad_norm": 3.7893664836883545, "learning_rate": 9.978112711030975e-06, "loss": 1.2634, "step": 1420 }, { "epoch": 0.392, "grad_norm": 3.7414979934692383, "learning_rate": 9.978027359915288e-06, "loss": 1.2839, "step": 1421 }, { "epoch": 0.39227586206896553, "grad_norm": 4.3467864990234375, "learning_rate": 9.977941843073231e-06, "loss": 1.5294, "step": 1422 }, { "epoch": 0.39255172413793105, "grad_norm": 4.275151252746582, "learning_rate": 9.97785616050765e-06, "loss": 1.4272, "step": 1423 }, { "epoch": 0.39282758620689656, "grad_norm": 4.022021770477295, "learning_rate": 9.9777703122214e-06, "loss": 1.2258, "step": 1424 }, { "epoch": 0.3931034482758621, "grad_norm": 3.641977310180664, "learning_rate": 9.97768429821734e-06, "loss": 1.2279, "step": 1425 }, { "epoch": 0.3933793103448276, "grad_norm": 3.8605620861053467, "learning_rate": 9.977598118498329e-06, "loss": 1.2774, "step": 1426 }, { "epoch": 0.3936551724137931, "grad_norm": 3.8923420906066895, "learning_rate": 9.97751177306724e-06, "loss": 1.3666, "step": 1427 }, { "epoch": 0.3939310344827586, "grad_norm": 4.238389492034912, "learning_rate": 9.977425261926945e-06, "loss": 1.2943, "step": 1428 }, { "epoch": 0.39420689655172414, "grad_norm": 4.150771617889404, "learning_rate": 9.977338585080327e-06, "loss": 1.1481, "step": 1429 }, { "epoch": 0.39448275862068966, "grad_norm": 4.366581916809082, "learning_rate": 9.977251742530269e-06, "loss": 1.2508, "step": 1430 }, { "epoch": 0.39475862068965517, "grad_norm": 4.1006178855896, "learning_rate": 9.977164734279663e-06, "loss": 1.1871, "step": 1431 }, { "epoch": 0.3950344827586207, "grad_norm": 4.322929382324219, "learning_rate": 9.977077560331406e-06, "loss": 1.2938, "step": 1432 }, { "epoch": 0.3953103448275862, "grad_norm": 4.276115417480469, "learning_rate": 9.976990220688399e-06, "loss": 1.4908, "step": 1433 }, { "epoch": 0.3955862068965517, "grad_norm": 4.153079986572266, "learning_rate": 9.97690271535355e-06, "loss": 1.1997, "step": 1434 }, { "epoch": 0.39586206896551723, "grad_norm": 3.6206753253936768, "learning_rate": 9.976815044329774e-06, "loss": 1.2713, "step": 1435 }, { "epoch": 0.39613793103448275, "grad_norm": 4.29370641708374, "learning_rate": 9.976727207619989e-06, "loss": 1.3353, "step": 1436 }, { "epoch": 0.39641379310344826, "grad_norm": 4.1537275314331055, "learning_rate": 9.976639205227118e-06, "loss": 1.3166, "step": 1437 }, { "epoch": 0.3966896551724138, "grad_norm": 4.185117721557617, "learning_rate": 9.976551037154092e-06, "loss": 1.327, "step": 1438 }, { "epoch": 0.3969655172413793, "grad_norm": 4.202447891235352, "learning_rate": 9.976462703403847e-06, "loss": 1.2481, "step": 1439 }, { "epoch": 0.3972413793103448, "grad_norm": 3.958693265914917, "learning_rate": 9.97637420397932e-06, "loss": 1.2744, "step": 1440 }, { "epoch": 0.3975172413793103, "grad_norm": 3.8135509490966797, "learning_rate": 9.976285538883459e-06, "loss": 1.2444, "step": 1441 }, { "epoch": 0.39779310344827584, "grad_norm": 4.061922550201416, "learning_rate": 9.976196708119219e-06, "loss": 1.4226, "step": 1442 }, { "epoch": 0.39806896551724136, "grad_norm": 3.8475513458251953, "learning_rate": 9.976107711689554e-06, "loss": 1.183, "step": 1443 }, { "epoch": 0.3983448275862069, "grad_norm": 4.238942623138428, "learning_rate": 9.976018549597426e-06, "loss": 1.6259, "step": 1444 }, { "epoch": 0.3986206896551724, "grad_norm": 4.554747581481934, "learning_rate": 9.975929221845806e-06, "loss": 1.1647, "step": 1445 }, { "epoch": 0.3988965517241379, "grad_norm": 3.7313058376312256, "learning_rate": 9.975839728437667e-06, "loss": 1.2419, "step": 1446 }, { "epoch": 0.3991724137931035, "grad_norm": 3.959758758544922, "learning_rate": 9.97575006937599e-06, "loss": 1.3689, "step": 1447 }, { "epoch": 0.399448275862069, "grad_norm": 3.8447041511535645, "learning_rate": 9.975660244663755e-06, "loss": 1.2881, "step": 1448 }, { "epoch": 0.3997241379310345, "grad_norm": 4.204307556152344, "learning_rate": 9.975570254303957e-06, "loss": 1.3477, "step": 1449 }, { "epoch": 0.4, "grad_norm": 4.1267876625061035, "learning_rate": 9.97548009829959e-06, "loss": 1.3258, "step": 1450 }, { "epoch": 0.40027586206896554, "grad_norm": 3.916475296020508, "learning_rate": 9.975389776653657e-06, "loss": 1.204, "step": 1451 }, { "epoch": 0.40055172413793105, "grad_norm": 3.9052250385284424, "learning_rate": 9.975299289369164e-06, "loss": 1.3416, "step": 1452 }, { "epoch": 0.40082758620689657, "grad_norm": 4.1196112632751465, "learning_rate": 9.975208636449122e-06, "loss": 1.205, "step": 1453 }, { "epoch": 0.4011034482758621, "grad_norm": 4.310620307922363, "learning_rate": 9.97511781789655e-06, "loss": 1.3701, "step": 1454 }, { "epoch": 0.4013793103448276, "grad_norm": 3.7118029594421387, "learning_rate": 9.975026833714474e-06, "loss": 1.3745, "step": 1455 }, { "epoch": 0.4016551724137931, "grad_norm": 4.059000492095947, "learning_rate": 9.974935683905919e-06, "loss": 1.1952, "step": 1456 }, { "epoch": 0.40193103448275863, "grad_norm": 4.652474880218506, "learning_rate": 9.974844368473921e-06, "loss": 1.2614, "step": 1457 }, { "epoch": 0.40220689655172415, "grad_norm": 4.074175834655762, "learning_rate": 9.974752887421521e-06, "loss": 1.3006, "step": 1458 }, { "epoch": 0.40248275862068966, "grad_norm": 4.066506862640381, "learning_rate": 9.974661240751764e-06, "loss": 1.3128, "step": 1459 }, { "epoch": 0.4027586206896552, "grad_norm": 4.252899169921875, "learning_rate": 9.974569428467701e-06, "loss": 1.381, "step": 1460 }, { "epoch": 0.4030344827586207, "grad_norm": 3.8838109970092773, "learning_rate": 9.974477450572388e-06, "loss": 1.3803, "step": 1461 }, { "epoch": 0.4033103448275862, "grad_norm": 4.6104230880737305, "learning_rate": 9.974385307068887e-06, "loss": 1.2994, "step": 1462 }, { "epoch": 0.4035862068965517, "grad_norm": 3.935293197631836, "learning_rate": 9.974292997960268e-06, "loss": 1.233, "step": 1463 }, { "epoch": 0.40386206896551724, "grad_norm": 3.8005905151367188, "learning_rate": 9.974200523249602e-06, "loss": 1.1824, "step": 1464 }, { "epoch": 0.40413793103448276, "grad_norm": 4.089283466339111, "learning_rate": 9.974107882939967e-06, "loss": 1.2548, "step": 1465 }, { "epoch": 0.40441379310344827, "grad_norm": 4.139223575592041, "learning_rate": 9.974015077034449e-06, "loss": 1.3182, "step": 1466 }, { "epoch": 0.4046896551724138, "grad_norm": 4.554809093475342, "learning_rate": 9.973922105536136e-06, "loss": 1.5467, "step": 1467 }, { "epoch": 0.4049655172413793, "grad_norm": 3.7283763885498047, "learning_rate": 9.973828968448125e-06, "loss": 1.0726, "step": 1468 }, { "epoch": 0.4052413793103448, "grad_norm": 3.7161777019500732, "learning_rate": 9.973735665773514e-06, "loss": 1.2767, "step": 1469 }, { "epoch": 0.40551724137931033, "grad_norm": 4.227314472198486, "learning_rate": 9.973642197515412e-06, "loss": 1.3106, "step": 1470 }, { "epoch": 0.40579310344827585, "grad_norm": 4.0421833992004395, "learning_rate": 9.97354856367693e-06, "loss": 1.3004, "step": 1471 }, { "epoch": 0.40606896551724136, "grad_norm": 3.882158041000366, "learning_rate": 9.973454764261184e-06, "loss": 1.2419, "step": 1472 }, { "epoch": 0.4063448275862069, "grad_norm": 3.723365306854248, "learning_rate": 9.973360799271296e-06, "loss": 1.1984, "step": 1473 }, { "epoch": 0.4066206896551724, "grad_norm": 3.9948418140411377, "learning_rate": 9.973266668710398e-06, "loss": 1.2404, "step": 1474 }, { "epoch": 0.4068965517241379, "grad_norm": 4.0237555503845215, "learning_rate": 9.973172372581621e-06, "loss": 1.2255, "step": 1475 }, { "epoch": 0.4071724137931034, "grad_norm": 4.7054877281188965, "learning_rate": 9.973077910888103e-06, "loss": 1.1498, "step": 1476 }, { "epoch": 0.40744827586206894, "grad_norm": 3.5169012546539307, "learning_rate": 9.972983283632992e-06, "loss": 1.1855, "step": 1477 }, { "epoch": 0.40772413793103446, "grad_norm": 3.6742196083068848, "learning_rate": 9.972888490819436e-06, "loss": 1.289, "step": 1478 }, { "epoch": 0.408, "grad_norm": 4.047756671905518, "learning_rate": 9.972793532450594e-06, "loss": 1.2624, "step": 1479 }, { "epoch": 0.40827586206896554, "grad_norm": 4.087266445159912, "learning_rate": 9.972698408529621e-06, "loss": 1.2038, "step": 1480 }, { "epoch": 0.40855172413793106, "grad_norm": 4.014871120452881, "learning_rate": 9.97260311905969e-06, "loss": 1.2371, "step": 1481 }, { "epoch": 0.4088275862068966, "grad_norm": 4.226722240447998, "learning_rate": 9.972507664043972e-06, "loss": 1.4027, "step": 1482 }, { "epoch": 0.4091034482758621, "grad_norm": 4.182595729827881, "learning_rate": 9.972412043485642e-06, "loss": 1.3967, "step": 1483 }, { "epoch": 0.4093793103448276, "grad_norm": 4.008903503417969, "learning_rate": 9.972316257387886e-06, "loss": 1.2316, "step": 1484 }, { "epoch": 0.4096551724137931, "grad_norm": 3.811257839202881, "learning_rate": 9.972220305753892e-06, "loss": 1.2827, "step": 1485 }, { "epoch": 0.40993103448275864, "grad_norm": 3.7607204914093018, "learning_rate": 9.972124188586854e-06, "loss": 1.314, "step": 1486 }, { "epoch": 0.41020689655172415, "grad_norm": 4.118701457977295, "learning_rate": 9.972027905889974e-06, "loss": 1.2199, "step": 1487 }, { "epoch": 0.41048275862068967, "grad_norm": 4.309303283691406, "learning_rate": 9.971931457666453e-06, "loss": 1.2077, "step": 1488 }, { "epoch": 0.4107586206896552, "grad_norm": 4.182068347930908, "learning_rate": 9.971834843919508e-06, "loss": 1.3385, "step": 1489 }, { "epoch": 0.4110344827586207, "grad_norm": 4.041060447692871, "learning_rate": 9.97173806465235e-06, "loss": 1.4102, "step": 1490 }, { "epoch": 0.4113103448275862, "grad_norm": 3.701934576034546, "learning_rate": 9.971641119868205e-06, "loss": 1.2756, "step": 1491 }, { "epoch": 0.41158620689655173, "grad_norm": 3.9022529125213623, "learning_rate": 9.971544009570296e-06, "loss": 1.2493, "step": 1492 }, { "epoch": 0.41186206896551725, "grad_norm": 3.9894840717315674, "learning_rate": 9.97144673376186e-06, "loss": 1.28, "step": 1493 }, { "epoch": 0.41213793103448276, "grad_norm": 4.262202739715576, "learning_rate": 9.971349292446132e-06, "loss": 1.3802, "step": 1494 }, { "epoch": 0.4124137931034483, "grad_norm": 3.715012788772583, "learning_rate": 9.971251685626362e-06, "loss": 1.2904, "step": 1495 }, { "epoch": 0.4126896551724138, "grad_norm": 4.402946949005127, "learning_rate": 9.971153913305793e-06, "loss": 1.3135, "step": 1496 }, { "epoch": 0.4129655172413793, "grad_norm": 4.395558834075928, "learning_rate": 9.971055975487683e-06, "loss": 1.2416, "step": 1497 }, { "epoch": 0.4132413793103448, "grad_norm": 4.223690032958984, "learning_rate": 9.970957872175291e-06, "loss": 1.1674, "step": 1498 }, { "epoch": 0.41351724137931034, "grad_norm": 3.9635469913482666, "learning_rate": 9.970859603371883e-06, "loss": 1.4918, "step": 1499 }, { "epoch": 0.41379310344827586, "grad_norm": 3.893720865249634, "learning_rate": 9.970761169080734e-06, "loss": 1.2446, "step": 1500 }, { "epoch": 0.41379310344827586, "eval_loss": 1.3262115716934204, "eval_runtime": 11.1925, "eval_samples_per_second": 35.738, "eval_steps_per_second": 4.467, "step": 1500 }, { "epoch": 0.41406896551724137, "grad_norm": 3.775925874710083, "learning_rate": 9.970662569305117e-06, "loss": 1.3551, "step": 1501 }, { "epoch": 0.4143448275862069, "grad_norm": 3.460024833679199, "learning_rate": 9.970563804048315e-06, "loss": 1.1033, "step": 1502 }, { "epoch": 0.4146206896551724, "grad_norm": 4.711709022521973, "learning_rate": 9.970464873313619e-06, "loss": 1.4454, "step": 1503 }, { "epoch": 0.4148965517241379, "grad_norm": 4.139220237731934, "learning_rate": 9.97036577710432e-06, "loss": 1.2786, "step": 1504 }, { "epoch": 0.41517241379310343, "grad_norm": 3.9773221015930176, "learning_rate": 9.970266515423718e-06, "loss": 1.3902, "step": 1505 }, { "epoch": 0.41544827586206895, "grad_norm": 4.159066677093506, "learning_rate": 9.970167088275115e-06, "loss": 1.2748, "step": 1506 }, { "epoch": 0.41572413793103447, "grad_norm": 4.5019636154174805, "learning_rate": 9.970067495661827e-06, "loss": 1.3319, "step": 1507 }, { "epoch": 0.416, "grad_norm": 3.9573256969451904, "learning_rate": 9.969967737587164e-06, "loss": 1.3855, "step": 1508 }, { "epoch": 0.4162758620689655, "grad_norm": 4.135264873504639, "learning_rate": 9.969867814054449e-06, "loss": 1.3335, "step": 1509 }, { "epoch": 0.416551724137931, "grad_norm": 4.012608528137207, "learning_rate": 9.969767725067008e-06, "loss": 1.314, "step": 1510 }, { "epoch": 0.4168275862068965, "grad_norm": 4.013237953186035, "learning_rate": 9.969667470628175e-06, "loss": 1.3419, "step": 1511 }, { "epoch": 0.41710344827586204, "grad_norm": 4.125654220581055, "learning_rate": 9.969567050741287e-06, "loss": 1.3153, "step": 1512 }, { "epoch": 0.41737931034482756, "grad_norm": 4.357562065124512, "learning_rate": 9.969466465409685e-06, "loss": 1.2977, "step": 1513 }, { "epoch": 0.41765517241379313, "grad_norm": 4.284656524658203, "learning_rate": 9.969365714636721e-06, "loss": 1.2542, "step": 1514 }, { "epoch": 0.41793103448275865, "grad_norm": 3.765967845916748, "learning_rate": 9.969264798425746e-06, "loss": 1.225, "step": 1515 }, { "epoch": 0.41820689655172416, "grad_norm": 3.8797106742858887, "learning_rate": 9.969163716780122e-06, "loss": 1.2604, "step": 1516 }, { "epoch": 0.4184827586206897, "grad_norm": 3.5552892684936523, "learning_rate": 9.969062469703212e-06, "loss": 1.2427, "step": 1517 }, { "epoch": 0.4187586206896552, "grad_norm": 4.0713934898376465, "learning_rate": 9.968961057198387e-06, "loss": 1.3247, "step": 1518 }, { "epoch": 0.4190344827586207, "grad_norm": 4.111432075500488, "learning_rate": 9.968859479269026e-06, "loss": 1.3042, "step": 1519 }, { "epoch": 0.4193103448275862, "grad_norm": 3.6520583629608154, "learning_rate": 9.968757735918509e-06, "loss": 1.2723, "step": 1520 }, { "epoch": 0.41958620689655174, "grad_norm": 3.7735445499420166, "learning_rate": 9.968655827150223e-06, "loss": 1.3423, "step": 1521 }, { "epoch": 0.41986206896551725, "grad_norm": 4.165539264678955, "learning_rate": 9.968553752967558e-06, "loss": 1.4018, "step": 1522 }, { "epoch": 0.42013793103448277, "grad_norm": 4.1931376457214355, "learning_rate": 9.968451513373915e-06, "loss": 1.4165, "step": 1523 }, { "epoch": 0.4204137931034483, "grad_norm": 3.8102803230285645, "learning_rate": 9.9683491083727e-06, "loss": 1.0955, "step": 1524 }, { "epoch": 0.4206896551724138, "grad_norm": 3.989250421524048, "learning_rate": 9.96824653796732e-06, "loss": 1.2912, "step": 1525 }, { "epoch": 0.4209655172413793, "grad_norm": 3.6376702785491943, "learning_rate": 9.968143802161188e-06, "loss": 1.1686, "step": 1526 }, { "epoch": 0.42124137931034483, "grad_norm": 3.90461802482605, "learning_rate": 9.968040900957726e-06, "loss": 1.2394, "step": 1527 }, { "epoch": 0.42151724137931035, "grad_norm": 3.9568049907684326, "learning_rate": 9.96793783436036e-06, "loss": 1.2687, "step": 1528 }, { "epoch": 0.42179310344827586, "grad_norm": 3.8974220752716064, "learning_rate": 9.96783460237252e-06, "loss": 1.2885, "step": 1529 }, { "epoch": 0.4220689655172414, "grad_norm": 4.023011684417725, "learning_rate": 9.967731204997643e-06, "loss": 1.252, "step": 1530 }, { "epoch": 0.4223448275862069, "grad_norm": 3.98040771484375, "learning_rate": 9.967627642239173e-06, "loss": 1.3911, "step": 1531 }, { "epoch": 0.4226206896551724, "grad_norm": 4.345715045928955, "learning_rate": 9.967523914100556e-06, "loss": 1.3882, "step": 1532 }, { "epoch": 0.4228965517241379, "grad_norm": 3.7053263187408447, "learning_rate": 9.967420020585245e-06, "loss": 1.0748, "step": 1533 }, { "epoch": 0.42317241379310344, "grad_norm": 4.296879291534424, "learning_rate": 9.967315961696702e-06, "loss": 1.2748, "step": 1534 }, { "epoch": 0.42344827586206896, "grad_norm": 4.068666934967041, "learning_rate": 9.967211737438387e-06, "loss": 1.3973, "step": 1535 }, { "epoch": 0.4237241379310345, "grad_norm": 3.873288154602051, "learning_rate": 9.967107347813772e-06, "loss": 1.3295, "step": 1536 }, { "epoch": 0.424, "grad_norm": 3.880908250808716, "learning_rate": 9.96700279282633e-06, "loss": 1.343, "step": 1537 }, { "epoch": 0.4242758620689655, "grad_norm": 4.007813930511475, "learning_rate": 9.966898072479546e-06, "loss": 1.3739, "step": 1538 }, { "epoch": 0.424551724137931, "grad_norm": 4.053515434265137, "learning_rate": 9.966793186776904e-06, "loss": 1.3053, "step": 1539 }, { "epoch": 0.42482758620689653, "grad_norm": 4.088168621063232, "learning_rate": 9.966688135721896e-06, "loss": 1.3964, "step": 1540 }, { "epoch": 0.42510344827586205, "grad_norm": 4.017636299133301, "learning_rate": 9.966582919318017e-06, "loss": 1.1823, "step": 1541 }, { "epoch": 0.42537931034482757, "grad_norm": 3.966434955596924, "learning_rate": 9.966477537568775e-06, "loss": 1.3769, "step": 1542 }, { "epoch": 0.4256551724137931, "grad_norm": 4.096083641052246, "learning_rate": 9.966371990477673e-06, "loss": 1.2632, "step": 1543 }, { "epoch": 0.4259310344827586, "grad_norm": 3.6247799396514893, "learning_rate": 9.96626627804823e-06, "loss": 1.3011, "step": 1544 }, { "epoch": 0.4262068965517241, "grad_norm": 4.314337730407715, "learning_rate": 9.96616040028396e-06, "loss": 1.4381, "step": 1545 }, { "epoch": 0.42648275862068963, "grad_norm": 4.0076904296875, "learning_rate": 9.96605435718839e-06, "loss": 1.3194, "step": 1546 }, { "epoch": 0.4267586206896552, "grad_norm": 4.305041313171387, "learning_rate": 9.96594814876505e-06, "loss": 1.4816, "step": 1547 }, { "epoch": 0.4270344827586207, "grad_norm": 3.94523549079895, "learning_rate": 9.96584177501748e-06, "loss": 1.3126, "step": 1548 }, { "epoch": 0.42731034482758623, "grad_norm": 3.847972869873047, "learning_rate": 9.965735235949214e-06, "loss": 1.1957, "step": 1549 }, { "epoch": 0.42758620689655175, "grad_norm": 4.023362159729004, "learning_rate": 9.965628531563806e-06, "loss": 1.3269, "step": 1550 }, { "epoch": 0.42786206896551726, "grad_norm": 3.853086233139038, "learning_rate": 9.965521661864803e-06, "loss": 1.391, "step": 1551 }, { "epoch": 0.4281379310344828, "grad_norm": 3.9172849655151367, "learning_rate": 9.965414626855765e-06, "loss": 1.2645, "step": 1552 }, { "epoch": 0.4284137931034483, "grad_norm": 4.231046199798584, "learning_rate": 9.965307426540256e-06, "loss": 1.3951, "step": 1553 }, { "epoch": 0.4286896551724138, "grad_norm": 4.0013203620910645, "learning_rate": 9.965200060921844e-06, "loss": 1.2671, "step": 1554 }, { "epoch": 0.4289655172413793, "grad_norm": 3.7605698108673096, "learning_rate": 9.965092530004104e-06, "loss": 1.2827, "step": 1555 }, { "epoch": 0.42924137931034484, "grad_norm": 4.07454252243042, "learning_rate": 9.964984833790615e-06, "loss": 1.4523, "step": 1556 }, { "epoch": 0.42951724137931035, "grad_norm": 3.884310245513916, "learning_rate": 9.964876972284962e-06, "loss": 1.2923, "step": 1557 }, { "epoch": 0.42979310344827587, "grad_norm": 3.823981761932373, "learning_rate": 9.964768945490738e-06, "loss": 1.2249, "step": 1558 }, { "epoch": 0.4300689655172414, "grad_norm": 3.8999547958374023, "learning_rate": 9.964660753411538e-06, "loss": 1.2023, "step": 1559 }, { "epoch": 0.4303448275862069, "grad_norm": 4.393777847290039, "learning_rate": 9.964552396050963e-06, "loss": 1.3537, "step": 1560 }, { "epoch": 0.4306206896551724, "grad_norm": 3.775014877319336, "learning_rate": 9.964443873412621e-06, "loss": 1.2767, "step": 1561 }, { "epoch": 0.43089655172413793, "grad_norm": 3.936379909515381, "learning_rate": 9.964335185500127e-06, "loss": 1.3288, "step": 1562 }, { "epoch": 0.43117241379310345, "grad_norm": 3.695448398590088, "learning_rate": 9.964226332317095e-06, "loss": 1.2612, "step": 1563 }, { "epoch": 0.43144827586206896, "grad_norm": 3.8401002883911133, "learning_rate": 9.964117313867154e-06, "loss": 1.2449, "step": 1564 }, { "epoch": 0.4317241379310345, "grad_norm": 3.8922557830810547, "learning_rate": 9.964008130153929e-06, "loss": 1.2635, "step": 1565 }, { "epoch": 0.432, "grad_norm": 4.163071632385254, "learning_rate": 9.963898781181058e-06, "loss": 1.4295, "step": 1566 }, { "epoch": 0.4322758620689655, "grad_norm": 4.072781085968018, "learning_rate": 9.96378926695218e-06, "loss": 1.4191, "step": 1567 }, { "epoch": 0.432551724137931, "grad_norm": 3.493251323699951, "learning_rate": 9.96367958747094e-06, "loss": 1.2682, "step": 1568 }, { "epoch": 0.43282758620689654, "grad_norm": 3.8378007411956787, "learning_rate": 9.963569742740992e-06, "loss": 1.3006, "step": 1569 }, { "epoch": 0.43310344827586206, "grad_norm": 4.484254837036133, "learning_rate": 9.96345973276599e-06, "loss": 1.2659, "step": 1570 }, { "epoch": 0.4333793103448276, "grad_norm": 3.56733775138855, "learning_rate": 9.963349557549599e-06, "loss": 1.1792, "step": 1571 }, { "epoch": 0.4336551724137931, "grad_norm": 3.7308082580566406, "learning_rate": 9.963239217095484e-06, "loss": 1.3528, "step": 1572 }, { "epoch": 0.4339310344827586, "grad_norm": 4.172521114349365, "learning_rate": 9.963128711407322e-06, "loss": 1.5901, "step": 1573 }, { "epoch": 0.4342068965517241, "grad_norm": 3.9759113788604736, "learning_rate": 9.963018040488788e-06, "loss": 1.3023, "step": 1574 }, { "epoch": 0.43448275862068964, "grad_norm": 3.9191057682037354, "learning_rate": 9.962907204343571e-06, "loss": 1.2706, "step": 1575 }, { "epoch": 0.43475862068965515, "grad_norm": 4.197622776031494, "learning_rate": 9.962796202975357e-06, "loss": 1.3188, "step": 1576 }, { "epoch": 0.43503448275862067, "grad_norm": 3.9418160915374756, "learning_rate": 9.962685036387842e-06, "loss": 1.3212, "step": 1577 }, { "epoch": 0.4353103448275862, "grad_norm": 3.8923256397247314, "learning_rate": 9.962573704584728e-06, "loss": 1.3356, "step": 1578 }, { "epoch": 0.4355862068965517, "grad_norm": 3.7941455841064453, "learning_rate": 9.962462207569723e-06, "loss": 1.2268, "step": 1579 }, { "epoch": 0.43586206896551727, "grad_norm": 4.096768379211426, "learning_rate": 9.962350545346535e-06, "loss": 1.3509, "step": 1580 }, { "epoch": 0.4361379310344828, "grad_norm": 3.8783316612243652, "learning_rate": 9.962238717918883e-06, "loss": 1.2805, "step": 1581 }, { "epoch": 0.4364137931034483, "grad_norm": 3.7733235359191895, "learning_rate": 9.962126725290491e-06, "loss": 1.2361, "step": 1582 }, { "epoch": 0.4366896551724138, "grad_norm": 3.686678171157837, "learning_rate": 9.962014567465089e-06, "loss": 1.29, "step": 1583 }, { "epoch": 0.43696551724137933, "grad_norm": 4.172106742858887, "learning_rate": 9.961902244446405e-06, "loss": 1.1948, "step": 1584 }, { "epoch": 0.43724137931034485, "grad_norm": 3.821059226989746, "learning_rate": 9.961789756238183e-06, "loss": 1.3792, "step": 1585 }, { "epoch": 0.43751724137931036, "grad_norm": 4.316245079040527, "learning_rate": 9.961677102844167e-06, "loss": 1.309, "step": 1586 }, { "epoch": 0.4377931034482759, "grad_norm": 3.8242526054382324, "learning_rate": 9.961564284268109e-06, "loss": 1.2739, "step": 1587 }, { "epoch": 0.4380689655172414, "grad_norm": 3.914832353591919, "learning_rate": 9.961451300513761e-06, "loss": 1.2287, "step": 1588 }, { "epoch": 0.4383448275862069, "grad_norm": 3.8141603469848633, "learning_rate": 9.961338151584887e-06, "loss": 1.2258, "step": 1589 }, { "epoch": 0.4386206896551724, "grad_norm": 4.229013442993164, "learning_rate": 9.961224837485256e-06, "loss": 1.2943, "step": 1590 }, { "epoch": 0.43889655172413794, "grad_norm": 3.5892882347106934, "learning_rate": 9.961111358218636e-06, "loss": 1.1404, "step": 1591 }, { "epoch": 0.43917241379310346, "grad_norm": 4.1906232833862305, "learning_rate": 9.960997713788807e-06, "loss": 1.3735, "step": 1592 }, { "epoch": 0.43944827586206897, "grad_norm": 3.947200059890747, "learning_rate": 9.960883904199551e-06, "loss": 1.3397, "step": 1593 }, { "epoch": 0.4397241379310345, "grad_norm": 3.873710870742798, "learning_rate": 9.96076992945466e-06, "loss": 1.3455, "step": 1594 }, { "epoch": 0.44, "grad_norm": 3.9288723468780518, "learning_rate": 9.960655789557926e-06, "loss": 1.2867, "step": 1595 }, { "epoch": 0.4402758620689655, "grad_norm": 4.232555866241455, "learning_rate": 9.96054148451315e-06, "loss": 1.2701, "step": 1596 }, { "epoch": 0.44055172413793103, "grad_norm": 3.5656495094299316, "learning_rate": 9.960427014324136e-06, "loss": 1.1796, "step": 1597 }, { "epoch": 0.44082758620689655, "grad_norm": 3.9015302658081055, "learning_rate": 9.960312378994697e-06, "loss": 1.1804, "step": 1598 }, { "epoch": 0.44110344827586206, "grad_norm": 3.7226758003234863, "learning_rate": 9.960197578528646e-06, "loss": 1.258, "step": 1599 }, { "epoch": 0.4413793103448276, "grad_norm": 4.059688091278076, "learning_rate": 9.960082612929809e-06, "loss": 1.4399, "step": 1600 }, { "epoch": 0.4416551724137931, "grad_norm": 3.728886127471924, "learning_rate": 9.95996748220201e-06, "loss": 1.3659, "step": 1601 }, { "epoch": 0.4419310344827586, "grad_norm": 4.721867561340332, "learning_rate": 9.959852186349084e-06, "loss": 1.2892, "step": 1602 }, { "epoch": 0.4422068965517241, "grad_norm": 4.087559223175049, "learning_rate": 9.95973672537487e-06, "loss": 1.3457, "step": 1603 }, { "epoch": 0.44248275862068964, "grad_norm": 4.081697940826416, "learning_rate": 9.95962109928321e-06, "loss": 1.2397, "step": 1604 }, { "epoch": 0.44275862068965516, "grad_norm": 3.8664708137512207, "learning_rate": 9.959505308077952e-06, "loss": 1.3327, "step": 1605 }, { "epoch": 0.4430344827586207, "grad_norm": 3.876143455505371, "learning_rate": 9.959389351762954e-06, "loss": 1.3537, "step": 1606 }, { "epoch": 0.4433103448275862, "grad_norm": 4.225246429443359, "learning_rate": 9.959273230342076e-06, "loss": 1.4244, "step": 1607 }, { "epoch": 0.4435862068965517, "grad_norm": 3.868665933609009, "learning_rate": 9.959156943819181e-06, "loss": 1.2367, "step": 1608 }, { "epoch": 0.4438620689655172, "grad_norm": 3.7177350521087646, "learning_rate": 9.959040492198144e-06, "loss": 1.3016, "step": 1609 }, { "epoch": 0.44413793103448274, "grad_norm": 4.100611209869385, "learning_rate": 9.958923875482841e-06, "loss": 1.2083, "step": 1610 }, { "epoch": 0.44441379310344825, "grad_norm": 3.909417152404785, "learning_rate": 9.958807093677152e-06, "loss": 1.3372, "step": 1611 }, { "epoch": 0.44468965517241377, "grad_norm": 3.874610185623169, "learning_rate": 9.958690146784968e-06, "loss": 1.2052, "step": 1612 }, { "epoch": 0.44496551724137934, "grad_norm": 3.8358066082000732, "learning_rate": 9.95857303481018e-06, "loss": 1.4186, "step": 1613 }, { "epoch": 0.44524137931034485, "grad_norm": 4.23676061630249, "learning_rate": 9.958455757756686e-06, "loss": 1.4768, "step": 1614 }, { "epoch": 0.44551724137931037, "grad_norm": 3.813832998275757, "learning_rate": 9.958338315628394e-06, "loss": 1.3103, "step": 1615 }, { "epoch": 0.4457931034482759, "grad_norm": 3.808617353439331, "learning_rate": 9.958220708429211e-06, "loss": 1.2084, "step": 1616 }, { "epoch": 0.4460689655172414, "grad_norm": 3.7833337783813477, "learning_rate": 9.958102936163054e-06, "loss": 1.1886, "step": 1617 }, { "epoch": 0.4463448275862069, "grad_norm": 4.049766540527344, "learning_rate": 9.957984998833843e-06, "loss": 1.3209, "step": 1618 }, { "epoch": 0.44662068965517243, "grad_norm": 4.1628947257995605, "learning_rate": 9.957866896445504e-06, "loss": 1.3774, "step": 1619 }, { "epoch": 0.44689655172413795, "grad_norm": 3.873643636703491, "learning_rate": 9.957748629001968e-06, "loss": 1.1213, "step": 1620 }, { "epoch": 0.44717241379310346, "grad_norm": 4.133866786956787, "learning_rate": 9.957630196507175e-06, "loss": 1.2882, "step": 1621 }, { "epoch": 0.447448275862069, "grad_norm": 4.331132411956787, "learning_rate": 9.957511598965067e-06, "loss": 1.3233, "step": 1622 }, { "epoch": 0.4477241379310345, "grad_norm": 3.9107789993286133, "learning_rate": 9.957392836379589e-06, "loss": 1.1388, "step": 1623 }, { "epoch": 0.448, "grad_norm": 3.8243393898010254, "learning_rate": 9.957273908754698e-06, "loss": 1.1809, "step": 1624 }, { "epoch": 0.4482758620689655, "grad_norm": 4.088597297668457, "learning_rate": 9.957154816094355e-06, "loss": 1.2805, "step": 1625 }, { "epoch": 0.44855172413793104, "grad_norm": 3.622049570083618, "learning_rate": 9.957035558402519e-06, "loss": 1.4102, "step": 1626 }, { "epoch": 0.44882758620689656, "grad_norm": 3.6810271739959717, "learning_rate": 9.956916135683165e-06, "loss": 1.2252, "step": 1627 }, { "epoch": 0.44910344827586207, "grad_norm": 3.8610715866088867, "learning_rate": 9.956796547940269e-06, "loss": 1.3498, "step": 1628 }, { "epoch": 0.4493793103448276, "grad_norm": 3.9787421226501465, "learning_rate": 9.956676795177809e-06, "loss": 1.2726, "step": 1629 }, { "epoch": 0.4496551724137931, "grad_norm": 3.801976442337036, "learning_rate": 9.956556877399771e-06, "loss": 1.1749, "step": 1630 }, { "epoch": 0.4499310344827586, "grad_norm": 3.6723172664642334, "learning_rate": 9.956436794610153e-06, "loss": 1.1634, "step": 1631 }, { "epoch": 0.45020689655172413, "grad_norm": 3.9990382194519043, "learning_rate": 9.95631654681295e-06, "loss": 1.3355, "step": 1632 }, { "epoch": 0.45048275862068965, "grad_norm": 4.1437668800354, "learning_rate": 9.956196134012164e-06, "loss": 1.3649, "step": 1633 }, { "epoch": 0.45075862068965517, "grad_norm": 3.8091957569122314, "learning_rate": 9.956075556211804e-06, "loss": 1.235, "step": 1634 }, { "epoch": 0.4510344827586207, "grad_norm": 3.723400831222534, "learning_rate": 9.955954813415885e-06, "loss": 1.2341, "step": 1635 }, { "epoch": 0.4513103448275862, "grad_norm": 3.955901861190796, "learning_rate": 9.955833905628426e-06, "loss": 1.2481, "step": 1636 }, { "epoch": 0.4515862068965517, "grad_norm": 3.6799089908599854, "learning_rate": 9.955712832853453e-06, "loss": 1.2186, "step": 1637 }, { "epoch": 0.4518620689655172, "grad_norm": 4.307386875152588, "learning_rate": 9.955591595094995e-06, "loss": 1.3148, "step": 1638 }, { "epoch": 0.45213793103448274, "grad_norm": 3.9667253494262695, "learning_rate": 9.95547019235709e-06, "loss": 1.3244, "step": 1639 }, { "epoch": 0.45241379310344826, "grad_norm": 4.349827766418457, "learning_rate": 9.95534862464378e-06, "loss": 1.3767, "step": 1640 }, { "epoch": 0.4526896551724138, "grad_norm": 3.5981667041778564, "learning_rate": 9.955226891959111e-06, "loss": 1.3004, "step": 1641 }, { "epoch": 0.4529655172413793, "grad_norm": 3.3673930168151855, "learning_rate": 9.955104994307137e-06, "loss": 1.0689, "step": 1642 }, { "epoch": 0.4532413793103448, "grad_norm": 3.8902103900909424, "learning_rate": 9.954982931691913e-06, "loss": 1.2318, "step": 1643 }, { "epoch": 0.4535172413793103, "grad_norm": 4.380942344665527, "learning_rate": 9.954860704117505e-06, "loss": 1.247, "step": 1644 }, { "epoch": 0.45379310344827584, "grad_norm": 4.2174601554870605, "learning_rate": 9.954738311587983e-06, "loss": 1.2212, "step": 1645 }, { "epoch": 0.45406896551724135, "grad_norm": 3.7043888568878174, "learning_rate": 9.954615754107421e-06, "loss": 1.3117, "step": 1646 }, { "epoch": 0.4543448275862069, "grad_norm": 4.199812412261963, "learning_rate": 9.9544930316799e-06, "loss": 1.3136, "step": 1647 }, { "epoch": 0.45462068965517244, "grad_norm": 4.418694972991943, "learning_rate": 9.954370144309502e-06, "loss": 1.278, "step": 1648 }, { "epoch": 0.45489655172413795, "grad_norm": 4.1149210929870605, "learning_rate": 9.954247092000322e-06, "loss": 1.2868, "step": 1649 }, { "epoch": 0.45517241379310347, "grad_norm": 3.9137048721313477, "learning_rate": 9.954123874756454e-06, "loss": 1.279, "step": 1650 }, { "epoch": 0.455448275862069, "grad_norm": 4.339657783508301, "learning_rate": 9.954000492582002e-06, "loss": 1.1738, "step": 1651 }, { "epoch": 0.4557241379310345, "grad_norm": 4.027700424194336, "learning_rate": 9.953876945481072e-06, "loss": 1.265, "step": 1652 }, { "epoch": 0.456, "grad_norm": 4.275692939758301, "learning_rate": 9.953753233457778e-06, "loss": 1.3595, "step": 1653 }, { "epoch": 0.45627586206896553, "grad_norm": 3.579664468765259, "learning_rate": 9.953629356516239e-06, "loss": 1.1679, "step": 1654 }, { "epoch": 0.45655172413793105, "grad_norm": 4.011932849884033, "learning_rate": 9.953505314660577e-06, "loss": 1.2969, "step": 1655 }, { "epoch": 0.45682758620689656, "grad_norm": 4.167861461639404, "learning_rate": 9.953381107894925e-06, "loss": 1.3893, "step": 1656 }, { "epoch": 0.4571034482758621, "grad_norm": 3.890826940536499, "learning_rate": 9.953256736223416e-06, "loss": 1.3216, "step": 1657 }, { "epoch": 0.4573793103448276, "grad_norm": 3.9160044193267822, "learning_rate": 9.953132199650188e-06, "loss": 1.2871, "step": 1658 }, { "epoch": 0.4576551724137931, "grad_norm": 3.561673879623413, "learning_rate": 9.953007498179392e-06, "loss": 1.3315, "step": 1659 }, { "epoch": 0.4579310344827586, "grad_norm": 3.6583333015441895, "learning_rate": 9.952882631815177e-06, "loss": 1.2123, "step": 1660 }, { "epoch": 0.45820689655172414, "grad_norm": 4.182065963745117, "learning_rate": 9.9527576005617e-06, "loss": 1.3561, "step": 1661 }, { "epoch": 0.45848275862068966, "grad_norm": 3.8867008686065674, "learning_rate": 9.952632404423124e-06, "loss": 1.2859, "step": 1662 }, { "epoch": 0.45875862068965517, "grad_norm": 4.139673709869385, "learning_rate": 9.952507043403616e-06, "loss": 1.4379, "step": 1663 }, { "epoch": 0.4590344827586207, "grad_norm": 4.13633918762207, "learning_rate": 9.95238151750735e-06, "loss": 1.3335, "step": 1664 }, { "epoch": 0.4593103448275862, "grad_norm": 4.095728397369385, "learning_rate": 9.952255826738504e-06, "loss": 1.1837, "step": 1665 }, { "epoch": 0.4595862068965517, "grad_norm": 3.973524808883667, "learning_rate": 9.952129971101265e-06, "loss": 1.3315, "step": 1666 }, { "epoch": 0.45986206896551723, "grad_norm": 4.098282337188721, "learning_rate": 9.95200395059982e-06, "loss": 1.2833, "step": 1667 }, { "epoch": 0.46013793103448275, "grad_norm": 4.04828405380249, "learning_rate": 9.951877765238368e-06, "loss": 1.2135, "step": 1668 }, { "epoch": 0.46041379310344827, "grad_norm": 4.110475063323975, "learning_rate": 9.951751415021107e-06, "loss": 1.3707, "step": 1669 }, { "epoch": 0.4606896551724138, "grad_norm": 4.308100700378418, "learning_rate": 9.951624899952244e-06, "loss": 1.3125, "step": 1670 }, { "epoch": 0.4609655172413793, "grad_norm": 4.5207438468933105, "learning_rate": 9.951498220035989e-06, "loss": 1.3438, "step": 1671 }, { "epoch": 0.4612413793103448, "grad_norm": 4.388579845428467, "learning_rate": 9.951371375276563e-06, "loss": 1.3126, "step": 1672 }, { "epoch": 0.46151724137931033, "grad_norm": 3.7253189086914062, "learning_rate": 9.951244365678188e-06, "loss": 1.2448, "step": 1673 }, { "epoch": 0.46179310344827584, "grad_norm": 4.079438209533691, "learning_rate": 9.95111719124509e-06, "loss": 1.2287, "step": 1674 }, { "epoch": 0.46206896551724136, "grad_norm": 4.067921161651611, "learning_rate": 9.950989851981505e-06, "loss": 1.382, "step": 1675 }, { "epoch": 0.4623448275862069, "grad_norm": 4.012572288513184, "learning_rate": 9.950862347891672e-06, "loss": 1.3911, "step": 1676 }, { "epoch": 0.4626206896551724, "grad_norm": 3.784644842147827, "learning_rate": 9.950734678979836e-06, "loss": 1.3066, "step": 1677 }, { "epoch": 0.4628965517241379, "grad_norm": 4.050516605377197, "learning_rate": 9.950606845250247e-06, "loss": 1.3381, "step": 1678 }, { "epoch": 0.4631724137931034, "grad_norm": 4.0342817306518555, "learning_rate": 9.95047884670716e-06, "loss": 1.3607, "step": 1679 }, { "epoch": 0.463448275862069, "grad_norm": 3.599045753479004, "learning_rate": 9.950350683354837e-06, "loss": 1.2126, "step": 1680 }, { "epoch": 0.4637241379310345, "grad_norm": 3.819985866546631, "learning_rate": 9.950222355197546e-06, "loss": 1.2092, "step": 1681 }, { "epoch": 0.464, "grad_norm": 4.200806617736816, "learning_rate": 9.950093862239556e-06, "loss": 1.2705, "step": 1682 }, { "epoch": 0.46427586206896554, "grad_norm": 4.05181360244751, "learning_rate": 9.949965204485147e-06, "loss": 1.377, "step": 1683 }, { "epoch": 0.46455172413793105, "grad_norm": 4.386220455169678, "learning_rate": 9.949836381938602e-06, "loss": 1.3119, "step": 1684 }, { "epoch": 0.46482758620689657, "grad_norm": 4.189709663391113, "learning_rate": 9.94970739460421e-06, "loss": 1.2569, "step": 1685 }, { "epoch": 0.4651034482758621, "grad_norm": 4.026418685913086, "learning_rate": 9.949578242486265e-06, "loss": 1.2972, "step": 1686 }, { "epoch": 0.4653793103448276, "grad_norm": 4.0562052726745605, "learning_rate": 9.949448925589064e-06, "loss": 1.3616, "step": 1687 }, { "epoch": 0.4656551724137931, "grad_norm": 3.901693820953369, "learning_rate": 9.949319443916918e-06, "loss": 1.1456, "step": 1688 }, { "epoch": 0.46593103448275863, "grad_norm": 3.9395360946655273, "learning_rate": 9.949189797474133e-06, "loss": 1.0925, "step": 1689 }, { "epoch": 0.46620689655172415, "grad_norm": 3.9562644958496094, "learning_rate": 9.949059986265027e-06, "loss": 1.3209, "step": 1690 }, { "epoch": 0.46648275862068966, "grad_norm": 4.495298385620117, "learning_rate": 9.94893001029392e-06, "loss": 1.5559, "step": 1691 }, { "epoch": 0.4667586206896552, "grad_norm": 4.3758134841918945, "learning_rate": 9.94879986956514e-06, "loss": 1.2853, "step": 1692 }, { "epoch": 0.4670344827586207, "grad_norm": 3.8997302055358887, "learning_rate": 9.94866956408302e-06, "loss": 1.2938, "step": 1693 }, { "epoch": 0.4673103448275862, "grad_norm": 3.9195008277893066, "learning_rate": 9.948539093851899e-06, "loss": 1.2723, "step": 1694 }, { "epoch": 0.4675862068965517, "grad_norm": 3.962388515472412, "learning_rate": 9.948408458876118e-06, "loss": 1.3359, "step": 1695 }, { "epoch": 0.46786206896551724, "grad_norm": 3.591376543045044, "learning_rate": 9.948277659160028e-06, "loss": 1.3533, "step": 1696 }, { "epoch": 0.46813793103448276, "grad_norm": 3.947258949279785, "learning_rate": 9.948146694707981e-06, "loss": 1.1659, "step": 1697 }, { "epoch": 0.4684137931034483, "grad_norm": 4.000836372375488, "learning_rate": 9.948015565524342e-06, "loss": 1.2752, "step": 1698 }, { "epoch": 0.4686896551724138, "grad_norm": 4.376636505126953, "learning_rate": 9.947884271613472e-06, "loss": 1.2046, "step": 1699 }, { "epoch": 0.4689655172413793, "grad_norm": 3.9291155338287354, "learning_rate": 9.947752812979743e-06, "loss": 1.2924, "step": 1700 }, { "epoch": 0.4692413793103448, "grad_norm": 4.103480815887451, "learning_rate": 9.947621189627533e-06, "loss": 1.4386, "step": 1701 }, { "epoch": 0.46951724137931033, "grad_norm": 3.929067373275757, "learning_rate": 9.947489401561221e-06, "loss": 1.2844, "step": 1702 }, { "epoch": 0.46979310344827585, "grad_norm": 4.072146415710449, "learning_rate": 9.947357448785197e-06, "loss": 1.2413, "step": 1703 }, { "epoch": 0.47006896551724137, "grad_norm": 3.8562142848968506, "learning_rate": 9.947225331303854e-06, "loss": 1.4423, "step": 1704 }, { "epoch": 0.4703448275862069, "grad_norm": 3.9811408519744873, "learning_rate": 9.94709304912159e-06, "loss": 1.2289, "step": 1705 }, { "epoch": 0.4706206896551724, "grad_norm": 3.9215612411499023, "learning_rate": 9.946960602242808e-06, "loss": 1.2333, "step": 1706 }, { "epoch": 0.4708965517241379, "grad_norm": 3.846414566040039, "learning_rate": 9.946827990671917e-06, "loss": 1.2868, "step": 1707 }, { "epoch": 0.47117241379310343, "grad_norm": 3.8609907627105713, "learning_rate": 9.946695214413333e-06, "loss": 1.3237, "step": 1708 }, { "epoch": 0.47144827586206894, "grad_norm": 3.9572348594665527, "learning_rate": 9.946562273471477e-06, "loss": 1.2939, "step": 1709 }, { "epoch": 0.47172413793103446, "grad_norm": 3.9606080055236816, "learning_rate": 9.946429167850772e-06, "loss": 1.3513, "step": 1710 }, { "epoch": 0.472, "grad_norm": 3.945857524871826, "learning_rate": 9.946295897555652e-06, "loss": 1.2763, "step": 1711 }, { "epoch": 0.4722758620689655, "grad_norm": 4.121281147003174, "learning_rate": 9.946162462590553e-06, "loss": 1.1602, "step": 1712 }, { "epoch": 0.47255172413793106, "grad_norm": 4.1014723777771, "learning_rate": 9.946028862959917e-06, "loss": 1.4597, "step": 1713 }, { "epoch": 0.4728275862068966, "grad_norm": 4.214315414428711, "learning_rate": 9.945895098668192e-06, "loss": 1.247, "step": 1714 }, { "epoch": 0.4731034482758621, "grad_norm": 3.7865750789642334, "learning_rate": 9.945761169719831e-06, "loss": 1.2439, "step": 1715 }, { "epoch": 0.4733793103448276, "grad_norm": 3.8851170539855957, "learning_rate": 9.945627076119292e-06, "loss": 1.3463, "step": 1716 }, { "epoch": 0.4736551724137931, "grad_norm": 3.914792060852051, "learning_rate": 9.945492817871043e-06, "loss": 1.4251, "step": 1717 }, { "epoch": 0.47393103448275864, "grad_norm": 4.031622886657715, "learning_rate": 9.945358394979547e-06, "loss": 1.3552, "step": 1718 }, { "epoch": 0.47420689655172416, "grad_norm": 3.6903605461120605, "learning_rate": 9.945223807449285e-06, "loss": 1.1428, "step": 1719 }, { "epoch": 0.47448275862068967, "grad_norm": 3.9227051734924316, "learning_rate": 9.945089055284735e-06, "loss": 1.3846, "step": 1720 }, { "epoch": 0.4747586206896552, "grad_norm": 4.150858402252197, "learning_rate": 9.944954138490383e-06, "loss": 1.4039, "step": 1721 }, { "epoch": 0.4750344827586207, "grad_norm": 4.133920192718506, "learning_rate": 9.944819057070722e-06, "loss": 1.2808, "step": 1722 }, { "epoch": 0.4753103448275862, "grad_norm": 4.3659281730651855, "learning_rate": 9.944683811030249e-06, "loss": 1.3467, "step": 1723 }, { "epoch": 0.47558620689655173, "grad_norm": 4.142817974090576, "learning_rate": 9.944548400373464e-06, "loss": 1.3329, "step": 1724 }, { "epoch": 0.47586206896551725, "grad_norm": 4.101214408874512, "learning_rate": 9.944412825104877e-06, "loss": 1.3444, "step": 1725 }, { "epoch": 0.47613793103448276, "grad_norm": 4.103698253631592, "learning_rate": 9.944277085229002e-06, "loss": 1.2404, "step": 1726 }, { "epoch": 0.4764137931034483, "grad_norm": 3.884173631668091, "learning_rate": 9.944141180750357e-06, "loss": 1.2937, "step": 1727 }, { "epoch": 0.4766896551724138, "grad_norm": 3.681948184967041, "learning_rate": 9.944005111673468e-06, "loss": 1.3051, "step": 1728 }, { "epoch": 0.4769655172413793, "grad_norm": 3.5863492488861084, "learning_rate": 9.943868878002863e-06, "loss": 1.2226, "step": 1729 }, { "epoch": 0.4772413793103448, "grad_norm": 3.7611823081970215, "learning_rate": 9.943732479743078e-06, "loss": 1.3183, "step": 1730 }, { "epoch": 0.47751724137931034, "grad_norm": 3.7829055786132812, "learning_rate": 9.943595916898653e-06, "loss": 1.1263, "step": 1731 }, { "epoch": 0.47779310344827586, "grad_norm": 3.816453695297241, "learning_rate": 9.943459189474137e-06, "loss": 1.3229, "step": 1732 }, { "epoch": 0.4780689655172414, "grad_norm": 3.619124412536621, "learning_rate": 9.94332229747408e-06, "loss": 1.3499, "step": 1733 }, { "epoch": 0.4783448275862069, "grad_norm": 4.107172012329102, "learning_rate": 9.94318524090304e-06, "loss": 1.3, "step": 1734 }, { "epoch": 0.4786206896551724, "grad_norm": 4.255190849304199, "learning_rate": 9.943048019765577e-06, "loss": 1.3496, "step": 1735 }, { "epoch": 0.4788965517241379, "grad_norm": 3.703075408935547, "learning_rate": 9.942910634066265e-06, "loss": 1.2407, "step": 1736 }, { "epoch": 0.47917241379310344, "grad_norm": 4.026134967803955, "learning_rate": 9.942773083809673e-06, "loss": 1.2004, "step": 1737 }, { "epoch": 0.47944827586206895, "grad_norm": 3.7916438579559326, "learning_rate": 9.942635369000384e-06, "loss": 1.1608, "step": 1738 }, { "epoch": 0.47972413793103447, "grad_norm": 3.8156189918518066, "learning_rate": 9.942497489642979e-06, "loss": 1.33, "step": 1739 }, { "epoch": 0.48, "grad_norm": 4.521904945373535, "learning_rate": 9.942359445742048e-06, "loss": 1.5368, "step": 1740 }, { "epoch": 0.4802758620689655, "grad_norm": 3.884934425354004, "learning_rate": 9.942221237302193e-06, "loss": 1.3363, "step": 1741 }, { "epoch": 0.480551724137931, "grad_norm": 3.932539701461792, "learning_rate": 9.942082864328008e-06, "loss": 1.2593, "step": 1742 }, { "epoch": 0.48082758620689653, "grad_norm": 4.12330961227417, "learning_rate": 9.941944326824103e-06, "loss": 1.3089, "step": 1743 }, { "epoch": 0.48110344827586204, "grad_norm": 3.9015939235687256, "learning_rate": 9.941805624795089e-06, "loss": 1.2243, "step": 1744 }, { "epoch": 0.48137931034482756, "grad_norm": 3.7423884868621826, "learning_rate": 9.941666758245584e-06, "loss": 1.2017, "step": 1745 }, { "epoch": 0.48165517241379313, "grad_norm": 3.7370965480804443, "learning_rate": 9.941527727180211e-06, "loss": 1.2463, "step": 1746 }, { "epoch": 0.48193103448275865, "grad_norm": 3.7485692501068115, "learning_rate": 9.9413885316036e-06, "loss": 1.2258, "step": 1747 }, { "epoch": 0.48220689655172416, "grad_norm": 3.872584581375122, "learning_rate": 9.941249171520384e-06, "loss": 1.1414, "step": 1748 }, { "epoch": 0.4824827586206897, "grad_norm": 3.9326961040496826, "learning_rate": 9.941109646935201e-06, "loss": 1.2669, "step": 1749 }, { "epoch": 0.4827586206896552, "grad_norm": 3.968630075454712, "learning_rate": 9.940969957852697e-06, "loss": 1.341, "step": 1750 }, { "epoch": 0.4830344827586207, "grad_norm": 4.1917548179626465, "learning_rate": 9.940830104277525e-06, "loss": 1.3458, "step": 1751 }, { "epoch": 0.4833103448275862, "grad_norm": 3.81684947013855, "learning_rate": 9.940690086214338e-06, "loss": 1.3636, "step": 1752 }, { "epoch": 0.48358620689655174, "grad_norm": 4.2337117195129395, "learning_rate": 9.940549903667798e-06, "loss": 1.4437, "step": 1753 }, { "epoch": 0.48386206896551726, "grad_norm": 3.722280263900757, "learning_rate": 9.940409556642572e-06, "loss": 1.3222, "step": 1754 }, { "epoch": 0.48413793103448277, "grad_norm": 3.5696909427642822, "learning_rate": 9.940269045143331e-06, "loss": 1.1889, "step": 1755 }, { "epoch": 0.4844137931034483, "grad_norm": 3.515186309814453, "learning_rate": 9.940128369174755e-06, "loss": 1.1271, "step": 1756 }, { "epoch": 0.4846896551724138, "grad_norm": 3.5995090007781982, "learning_rate": 9.939987528741528e-06, "loss": 1.4182, "step": 1757 }, { "epoch": 0.4849655172413793, "grad_norm": 3.945138692855835, "learning_rate": 9.939846523848336e-06, "loss": 1.1797, "step": 1758 }, { "epoch": 0.48524137931034483, "grad_norm": 3.8496720790863037, "learning_rate": 9.939705354499876e-06, "loss": 1.3361, "step": 1759 }, { "epoch": 0.48551724137931035, "grad_norm": 4.131053924560547, "learning_rate": 9.939564020700845e-06, "loss": 1.3247, "step": 1760 }, { "epoch": 0.48579310344827586, "grad_norm": 3.647158622741699, "learning_rate": 9.93942252245595e-06, "loss": 1.1558, "step": 1761 }, { "epoch": 0.4860689655172414, "grad_norm": 4.3240203857421875, "learning_rate": 9.939280859769902e-06, "loss": 1.3436, "step": 1762 }, { "epoch": 0.4863448275862069, "grad_norm": 3.802649736404419, "learning_rate": 9.939139032647417e-06, "loss": 1.3806, "step": 1763 }, { "epoch": 0.4866206896551724, "grad_norm": 3.7897350788116455, "learning_rate": 9.938997041093215e-06, "loss": 1.214, "step": 1764 }, { "epoch": 0.4868965517241379, "grad_norm": 3.9166693687438965, "learning_rate": 9.938854885112026e-06, "loss": 1.2186, "step": 1765 }, { "epoch": 0.48717241379310344, "grad_norm": 3.8004438877105713, "learning_rate": 9.93871256470858e-06, "loss": 1.1325, "step": 1766 }, { "epoch": 0.48744827586206896, "grad_norm": 3.818784475326538, "learning_rate": 9.938570079887616e-06, "loss": 1.41, "step": 1767 }, { "epoch": 0.4877241379310345, "grad_norm": 3.966099739074707, "learning_rate": 9.938427430653877e-06, "loss": 1.1879, "step": 1768 }, { "epoch": 0.488, "grad_norm": 3.5495331287384033, "learning_rate": 9.938284617012112e-06, "loss": 1.2927, "step": 1769 }, { "epoch": 0.4882758620689655, "grad_norm": 4.437746524810791, "learning_rate": 9.938141638967076e-06, "loss": 1.3943, "step": 1770 }, { "epoch": 0.488551724137931, "grad_norm": 4.230303764343262, "learning_rate": 9.93799849652353e-06, "loss": 1.4753, "step": 1771 }, { "epoch": 0.48882758620689654, "grad_norm": 3.883570671081543, "learning_rate": 9.937855189686239e-06, "loss": 1.2737, "step": 1772 }, { "epoch": 0.48910344827586205, "grad_norm": 3.7267589569091797, "learning_rate": 9.937711718459973e-06, "loss": 1.2781, "step": 1773 }, { "epoch": 0.48937931034482757, "grad_norm": 4.135288715362549, "learning_rate": 9.937568082849508e-06, "loss": 1.4301, "step": 1774 }, { "epoch": 0.4896551724137931, "grad_norm": 3.769199848175049, "learning_rate": 9.937424282859627e-06, "loss": 1.2102, "step": 1775 }, { "epoch": 0.4899310344827586, "grad_norm": 4.088886260986328, "learning_rate": 9.937280318495116e-06, "loss": 1.4152, "step": 1776 }, { "epoch": 0.4902068965517241, "grad_norm": 3.5940587520599365, "learning_rate": 9.93713618976077e-06, "loss": 1.1195, "step": 1777 }, { "epoch": 0.49048275862068963, "grad_norm": 3.5897650718688965, "learning_rate": 9.936991896661385e-06, "loss": 1.1434, "step": 1778 }, { "epoch": 0.49075862068965515, "grad_norm": 3.700078010559082, "learning_rate": 9.936847439201767e-06, "loss": 1.2524, "step": 1779 }, { "epoch": 0.4910344827586207, "grad_norm": 3.9094555377960205, "learning_rate": 9.936702817386723e-06, "loss": 1.3117, "step": 1780 }, { "epoch": 0.49131034482758623, "grad_norm": 4.091640949249268, "learning_rate": 9.936558031221068e-06, "loss": 1.2468, "step": 1781 }, { "epoch": 0.49158620689655175, "grad_norm": 4.03535270690918, "learning_rate": 9.936413080709623e-06, "loss": 1.2454, "step": 1782 }, { "epoch": 0.49186206896551726, "grad_norm": 3.9052388668060303, "learning_rate": 9.936267965857216e-06, "loss": 1.3701, "step": 1783 }, { "epoch": 0.4921379310344828, "grad_norm": 3.882084608078003, "learning_rate": 9.936122686668671e-06, "loss": 1.284, "step": 1784 }, { "epoch": 0.4924137931034483, "grad_norm": 4.1502885818481445, "learning_rate": 9.935977243148834e-06, "loss": 1.293, "step": 1785 }, { "epoch": 0.4926896551724138, "grad_norm": 4.19443416595459, "learning_rate": 9.93583163530254e-06, "loss": 1.4504, "step": 1786 }, { "epoch": 0.4929655172413793, "grad_norm": 3.7967517375946045, "learning_rate": 9.935685863134638e-06, "loss": 1.2466, "step": 1787 }, { "epoch": 0.49324137931034484, "grad_norm": 3.6924374103546143, "learning_rate": 9.935539926649982e-06, "loss": 1.2792, "step": 1788 }, { "epoch": 0.49351724137931036, "grad_norm": 3.6871840953826904, "learning_rate": 9.93539382585343e-06, "loss": 1.2372, "step": 1789 }, { "epoch": 0.49379310344827587, "grad_norm": 3.9754981994628906, "learning_rate": 9.935247560749846e-06, "loss": 1.3536, "step": 1790 }, { "epoch": 0.4940689655172414, "grad_norm": 3.8941051959991455, "learning_rate": 9.9351011313441e-06, "loss": 1.2697, "step": 1791 }, { "epoch": 0.4943448275862069, "grad_norm": 4.273036003112793, "learning_rate": 9.934954537641066e-06, "loss": 1.2182, "step": 1792 }, { "epoch": 0.4946206896551724, "grad_norm": 4.237696170806885, "learning_rate": 9.934807779645624e-06, "loss": 1.2323, "step": 1793 }, { "epoch": 0.49489655172413793, "grad_norm": 3.84759521484375, "learning_rate": 9.934660857362662e-06, "loss": 1.2663, "step": 1794 }, { "epoch": 0.49517241379310345, "grad_norm": 3.777597188949585, "learning_rate": 9.934513770797067e-06, "loss": 1.2733, "step": 1795 }, { "epoch": 0.49544827586206897, "grad_norm": 3.8883955478668213, "learning_rate": 9.93436651995374e-06, "loss": 1.2959, "step": 1796 }, { "epoch": 0.4957241379310345, "grad_norm": 4.020442962646484, "learning_rate": 9.934219104837584e-06, "loss": 1.256, "step": 1797 }, { "epoch": 0.496, "grad_norm": 3.733812093734741, "learning_rate": 9.934071525453502e-06, "loss": 1.1663, "step": 1798 }, { "epoch": 0.4962758620689655, "grad_norm": 3.9261510372161865, "learning_rate": 9.933923781806409e-06, "loss": 1.3205, "step": 1799 }, { "epoch": 0.496551724137931, "grad_norm": 3.829045534133911, "learning_rate": 9.933775873901223e-06, "loss": 1.2569, "step": 1800 }, { "epoch": 0.49682758620689654, "grad_norm": 3.9739208221435547, "learning_rate": 9.933627801742873e-06, "loss": 1.2856, "step": 1801 }, { "epoch": 0.49710344827586206, "grad_norm": 4.238332748413086, "learning_rate": 9.93347956533628e-06, "loss": 1.3913, "step": 1802 }, { "epoch": 0.4973793103448276, "grad_norm": 4.093486309051514, "learning_rate": 9.933331164686388e-06, "loss": 1.4085, "step": 1803 }, { "epoch": 0.4976551724137931, "grad_norm": 3.9186580181121826, "learning_rate": 9.933182599798132e-06, "loss": 1.2241, "step": 1804 }, { "epoch": 0.4979310344827586, "grad_norm": 3.843785524368286, "learning_rate": 9.933033870676459e-06, "loss": 1.2168, "step": 1805 }, { "epoch": 0.4982068965517241, "grad_norm": 3.828706979751587, "learning_rate": 9.93288497732632e-06, "loss": 1.4045, "step": 1806 }, { "epoch": 0.49848275862068964, "grad_norm": 4.153365612030029, "learning_rate": 9.932735919752674e-06, "loss": 1.3731, "step": 1807 }, { "epoch": 0.49875862068965515, "grad_norm": 4.345394134521484, "learning_rate": 9.93258669796048e-06, "loss": 1.3201, "step": 1808 }, { "epoch": 0.49903448275862067, "grad_norm": 4.054732799530029, "learning_rate": 9.932437311954709e-06, "loss": 1.3094, "step": 1809 }, { "epoch": 0.4993103448275862, "grad_norm": 3.900454044342041, "learning_rate": 9.932287761740334e-06, "loss": 1.1949, "step": 1810 }, { "epoch": 0.4995862068965517, "grad_norm": 3.997684955596924, "learning_rate": 9.93213804732233e-06, "loss": 1.3553, "step": 1811 }, { "epoch": 0.4998620689655172, "grad_norm": 3.8673295974731445, "learning_rate": 9.931988168705686e-06, "loss": 1.2368, "step": 1812 }, { "epoch": 0.5001379310344828, "grad_norm": 3.93471097946167, "learning_rate": 9.931838125895389e-06, "loss": 1.3731, "step": 1813 }, { "epoch": 0.5004137931034482, "grad_norm": 3.766634464263916, "learning_rate": 9.931687918896435e-06, "loss": 1.2993, "step": 1814 }, { "epoch": 0.5006896551724138, "grad_norm": 4.45564079284668, "learning_rate": 9.931537547713826e-06, "loss": 1.308, "step": 1815 }, { "epoch": 0.5009655172413793, "grad_norm": 4.0610222816467285, "learning_rate": 9.931387012352565e-06, "loss": 1.2788, "step": 1816 }, { "epoch": 0.5012413793103448, "grad_norm": 3.464578628540039, "learning_rate": 9.931236312817664e-06, "loss": 1.202, "step": 1817 }, { "epoch": 0.5015172413793103, "grad_norm": 3.889246702194214, "learning_rate": 9.931085449114142e-06, "loss": 1.2978, "step": 1818 }, { "epoch": 0.5017931034482759, "grad_norm": 3.6296913623809814, "learning_rate": 9.930934421247023e-06, "loss": 1.2518, "step": 1819 }, { "epoch": 0.5020689655172413, "grad_norm": 3.739145278930664, "learning_rate": 9.93078322922133e-06, "loss": 1.362, "step": 1820 }, { "epoch": 0.5023448275862069, "grad_norm": 3.8967108726501465, "learning_rate": 9.930631873042101e-06, "loss": 1.3446, "step": 1821 }, { "epoch": 0.5026206896551724, "grad_norm": 3.8554553985595703, "learning_rate": 9.930480352714371e-06, "loss": 1.1646, "step": 1822 }, { "epoch": 0.5028965517241379, "grad_norm": 3.886056423187256, "learning_rate": 9.930328668243188e-06, "loss": 1.3473, "step": 1823 }, { "epoch": 0.5031724137931034, "grad_norm": 3.871497392654419, "learning_rate": 9.930176819633598e-06, "loss": 1.3102, "step": 1824 }, { "epoch": 0.503448275862069, "grad_norm": 3.5470547676086426, "learning_rate": 9.930024806890659e-06, "loss": 1.0645, "step": 1825 }, { "epoch": 0.5037241379310344, "grad_norm": 3.8993618488311768, "learning_rate": 9.929872630019433e-06, "loss": 1.2317, "step": 1826 }, { "epoch": 0.504, "grad_norm": 3.642237424850464, "learning_rate": 9.929720289024983e-06, "loss": 1.2604, "step": 1827 }, { "epoch": 0.5042758620689655, "grad_norm": 3.619847059249878, "learning_rate": 9.929567783912381e-06, "loss": 1.2431, "step": 1828 }, { "epoch": 0.504551724137931, "grad_norm": 3.628711462020874, "learning_rate": 9.929415114686707e-06, "loss": 1.2715, "step": 1829 }, { "epoch": 0.5048275862068966, "grad_norm": 3.9714887142181396, "learning_rate": 9.92926228135304e-06, "loss": 1.2679, "step": 1830 }, { "epoch": 0.5051034482758621, "grad_norm": 4.032348155975342, "learning_rate": 9.929109283916471e-06, "loss": 1.2678, "step": 1831 }, { "epoch": 0.5053793103448276, "grad_norm": 3.987300157546997, "learning_rate": 9.928956122382092e-06, "loss": 1.313, "step": 1832 }, { "epoch": 0.5056551724137931, "grad_norm": 4.004826545715332, "learning_rate": 9.928802796755002e-06, "loss": 1.418, "step": 1833 }, { "epoch": 0.5059310344827587, "grad_norm": 3.6888859272003174, "learning_rate": 9.928649307040306e-06, "loss": 1.1347, "step": 1834 }, { "epoch": 0.5062068965517241, "grad_norm": 3.731584310531616, "learning_rate": 9.928495653243115e-06, "loss": 1.3353, "step": 1835 }, { "epoch": 0.5064827586206897, "grad_norm": 3.7170119285583496, "learning_rate": 9.928341835368541e-06, "loss": 1.2585, "step": 1836 }, { "epoch": 0.5067586206896552, "grad_norm": 3.913182020187378, "learning_rate": 9.928187853421707e-06, "loss": 1.3958, "step": 1837 }, { "epoch": 0.5070344827586207, "grad_norm": 4.226877689361572, "learning_rate": 9.92803370740774e-06, "loss": 1.3271, "step": 1838 }, { "epoch": 0.5073103448275862, "grad_norm": 4.021875858306885, "learning_rate": 9.927879397331773e-06, "loss": 1.3172, "step": 1839 }, { "epoch": 0.5075862068965518, "grad_norm": 3.922971725463867, "learning_rate": 9.927724923198938e-06, "loss": 1.2336, "step": 1840 }, { "epoch": 0.5078620689655172, "grad_norm": 4.347784996032715, "learning_rate": 9.927570285014382e-06, "loss": 1.3234, "step": 1841 }, { "epoch": 0.5081379310344828, "grad_norm": 4.064385414123535, "learning_rate": 9.92741548278325e-06, "loss": 1.1782, "step": 1842 }, { "epoch": 0.5084137931034483, "grad_norm": 4.459571838378906, "learning_rate": 9.927260516510701e-06, "loss": 1.2926, "step": 1843 }, { "epoch": 0.5086896551724138, "grad_norm": 3.5360286235809326, "learning_rate": 9.92710538620189e-06, "loss": 1.1616, "step": 1844 }, { "epoch": 0.5089655172413793, "grad_norm": 3.765646457672119, "learning_rate": 9.926950091861982e-06, "loss": 1.1952, "step": 1845 }, { "epoch": 0.5092413793103449, "grad_norm": 3.69584321975708, "learning_rate": 9.926794633496146e-06, "loss": 1.1769, "step": 1846 }, { "epoch": 0.5095172413793103, "grad_norm": 4.141855716705322, "learning_rate": 9.92663901110956e-06, "loss": 1.3794, "step": 1847 }, { "epoch": 0.5097931034482759, "grad_norm": 4.390835285186768, "learning_rate": 9.926483224707401e-06, "loss": 1.5355, "step": 1848 }, { "epoch": 0.5100689655172413, "grad_norm": 3.879450559616089, "learning_rate": 9.92632727429486e-06, "loss": 1.2646, "step": 1849 }, { "epoch": 0.5103448275862069, "grad_norm": 3.8935396671295166, "learning_rate": 9.926171159877127e-06, "loss": 1.2383, "step": 1850 }, { "epoch": 0.5106206896551724, "grad_norm": 3.902341842651367, "learning_rate": 9.9260148814594e-06, "loss": 1.4317, "step": 1851 }, { "epoch": 0.510896551724138, "grad_norm": 3.565821409225464, "learning_rate": 9.925858439046878e-06, "loss": 1.2459, "step": 1852 }, { "epoch": 0.5111724137931034, "grad_norm": 3.7994117736816406, "learning_rate": 9.925701832644775e-06, "loss": 1.46, "step": 1853 }, { "epoch": 0.511448275862069, "grad_norm": 3.884650945663452, "learning_rate": 9.925545062258299e-06, "loss": 1.2744, "step": 1854 }, { "epoch": 0.5117241379310344, "grad_norm": 3.9230220317840576, "learning_rate": 9.925388127892675e-06, "loss": 1.3449, "step": 1855 }, { "epoch": 0.512, "grad_norm": 3.843308687210083, "learning_rate": 9.925231029553121e-06, "loss": 1.2737, "step": 1856 }, { "epoch": 0.5122758620689655, "grad_norm": 4.076361179351807, "learning_rate": 9.925073767244873e-06, "loss": 1.3041, "step": 1857 }, { "epoch": 0.512551724137931, "grad_norm": 3.625561475753784, "learning_rate": 9.924916340973165e-06, "loss": 1.2097, "step": 1858 }, { "epoch": 0.5128275862068965, "grad_norm": 3.947268486022949, "learning_rate": 9.924758750743235e-06, "loss": 1.1881, "step": 1859 }, { "epoch": 0.5131034482758621, "grad_norm": 3.9791269302368164, "learning_rate": 9.924600996560332e-06, "loss": 1.3099, "step": 1860 }, { "epoch": 0.5133793103448275, "grad_norm": 4.158816814422607, "learning_rate": 9.924443078429707e-06, "loss": 1.2958, "step": 1861 }, { "epoch": 0.5136551724137931, "grad_norm": 3.789597749710083, "learning_rate": 9.924284996356616e-06, "loss": 1.4113, "step": 1862 }, { "epoch": 0.5139310344827587, "grad_norm": 4.2323408126831055, "learning_rate": 9.924126750346325e-06, "loss": 1.2986, "step": 1863 }, { "epoch": 0.5142068965517241, "grad_norm": 3.694888114929199, "learning_rate": 9.923968340404103e-06, "loss": 1.2061, "step": 1864 }, { "epoch": 0.5144827586206897, "grad_norm": 4.00763463973999, "learning_rate": 9.923809766535218e-06, "loss": 1.2014, "step": 1865 }, { "epoch": 0.5147586206896552, "grad_norm": 4.018078327178955, "learning_rate": 9.923651028744955e-06, "loss": 1.3075, "step": 1866 }, { "epoch": 0.5150344827586207, "grad_norm": 3.6922125816345215, "learning_rate": 9.923492127038595e-06, "loss": 1.281, "step": 1867 }, { "epoch": 0.5153103448275862, "grad_norm": 3.856982946395874, "learning_rate": 9.92333306142143e-06, "loss": 1.2967, "step": 1868 }, { "epoch": 0.5155862068965518, "grad_norm": 3.7531824111938477, "learning_rate": 9.923173831898753e-06, "loss": 1.2603, "step": 1869 }, { "epoch": 0.5158620689655172, "grad_norm": 3.7249808311462402, "learning_rate": 9.923014438475868e-06, "loss": 1.3588, "step": 1870 }, { "epoch": 0.5161379310344828, "grad_norm": 3.5317881107330322, "learning_rate": 9.922854881158082e-06, "loss": 1.3527, "step": 1871 }, { "epoch": 0.5164137931034483, "grad_norm": 3.8724277019500732, "learning_rate": 9.922695159950702e-06, "loss": 1.3493, "step": 1872 }, { "epoch": 0.5166896551724138, "grad_norm": 4.043336868286133, "learning_rate": 9.922535274859052e-06, "loss": 1.2349, "step": 1873 }, { "epoch": 0.5169655172413793, "grad_norm": 3.729553699493408, "learning_rate": 9.922375225888449e-06, "loss": 1.3411, "step": 1874 }, { "epoch": 0.5172413793103449, "grad_norm": 3.6128990650177, "learning_rate": 9.922215013044223e-06, "loss": 1.2853, "step": 1875 }, { "epoch": 0.5175172413793103, "grad_norm": 4.179464340209961, "learning_rate": 9.922054636331711e-06, "loss": 1.3581, "step": 1876 }, { "epoch": 0.5177931034482759, "grad_norm": 3.771078109741211, "learning_rate": 9.921894095756249e-06, "loss": 1.1582, "step": 1877 }, { "epoch": 0.5180689655172414, "grad_norm": 3.6345877647399902, "learning_rate": 9.921733391323181e-06, "loss": 1.3583, "step": 1878 }, { "epoch": 0.5183448275862069, "grad_norm": 4.095683574676514, "learning_rate": 9.92157252303786e-06, "loss": 1.3256, "step": 1879 }, { "epoch": 0.5186206896551724, "grad_norm": 4.205842971801758, "learning_rate": 9.921411490905639e-06, "loss": 1.2986, "step": 1880 }, { "epoch": 0.518896551724138, "grad_norm": 4.013172626495361, "learning_rate": 9.92125029493188e-06, "loss": 1.431, "step": 1881 }, { "epoch": 0.5191724137931034, "grad_norm": 4.106489181518555, "learning_rate": 9.921088935121949e-06, "loss": 1.2624, "step": 1882 }, { "epoch": 0.519448275862069, "grad_norm": 3.9852402210235596, "learning_rate": 9.92092741148122e-06, "loss": 1.2898, "step": 1883 }, { "epoch": 0.5197241379310344, "grad_norm": 3.781435966491699, "learning_rate": 9.920765724015068e-06, "loss": 1.2162, "step": 1884 }, { "epoch": 0.52, "grad_norm": 3.8954246044158936, "learning_rate": 9.920603872728875e-06, "loss": 1.3416, "step": 1885 }, { "epoch": 0.5202758620689655, "grad_norm": 4.19786262512207, "learning_rate": 9.920441857628033e-06, "loss": 1.3513, "step": 1886 }, { "epoch": 0.520551724137931, "grad_norm": 4.121763706207275, "learning_rate": 9.920279678717934e-06, "loss": 1.3772, "step": 1887 }, { "epoch": 0.5208275862068965, "grad_norm": 4.2546491622924805, "learning_rate": 9.920117336003975e-06, "loss": 1.3723, "step": 1888 }, { "epoch": 0.5211034482758621, "grad_norm": 4.155087471008301, "learning_rate": 9.919954829491563e-06, "loss": 1.309, "step": 1889 }, { "epoch": 0.5213793103448275, "grad_norm": 3.6079745292663574, "learning_rate": 9.919792159186108e-06, "loss": 1.2223, "step": 1890 }, { "epoch": 0.5216551724137931, "grad_norm": 3.911931037902832, "learning_rate": 9.919629325093024e-06, "loss": 1.2092, "step": 1891 }, { "epoch": 0.5219310344827586, "grad_norm": 4.010016918182373, "learning_rate": 9.919466327217732e-06, "loss": 1.269, "step": 1892 }, { "epoch": 0.5222068965517241, "grad_norm": 4.010383605957031, "learning_rate": 9.919303165565664e-06, "loss": 1.2039, "step": 1893 }, { "epoch": 0.5224827586206896, "grad_norm": 3.4968111515045166, "learning_rate": 9.919139840142244e-06, "loss": 1.1964, "step": 1894 }, { "epoch": 0.5227586206896552, "grad_norm": 4.04970645904541, "learning_rate": 9.918976350952913e-06, "loss": 1.2085, "step": 1895 }, { "epoch": 0.5230344827586207, "grad_norm": 3.8984439373016357, "learning_rate": 9.918812698003114e-06, "loss": 1.163, "step": 1896 }, { "epoch": 0.5233103448275862, "grad_norm": 3.7318708896636963, "learning_rate": 9.918648881298294e-06, "loss": 1.2811, "step": 1897 }, { "epoch": 0.5235862068965518, "grad_norm": 4.5523362159729, "learning_rate": 9.91848490084391e-06, "loss": 1.3432, "step": 1898 }, { "epoch": 0.5238620689655172, "grad_norm": 3.9151883125305176, "learning_rate": 9.918320756645417e-06, "loss": 1.3143, "step": 1899 }, { "epoch": 0.5241379310344828, "grad_norm": 3.4611265659332275, "learning_rate": 9.918156448708281e-06, "loss": 1.3225, "step": 1900 }, { "epoch": 0.5244137931034483, "grad_norm": 3.9672563076019287, "learning_rate": 9.917991977037972e-06, "loss": 1.3741, "step": 1901 }, { "epoch": 0.5246896551724138, "grad_norm": 3.805467367172241, "learning_rate": 9.917827341639969e-06, "loss": 1.3195, "step": 1902 }, { "epoch": 0.5249655172413793, "grad_norm": 3.4838078022003174, "learning_rate": 9.917662542519749e-06, "loss": 1.1851, "step": 1903 }, { "epoch": 0.5252413793103449, "grad_norm": 3.56864070892334, "learning_rate": 9.917497579682798e-06, "loss": 1.2095, "step": 1904 }, { "epoch": 0.5255172413793103, "grad_norm": 3.9586074352264404, "learning_rate": 9.917332453134609e-06, "loss": 1.4367, "step": 1905 }, { "epoch": 0.5257931034482759, "grad_norm": 3.6456048488616943, "learning_rate": 9.91716716288068e-06, "loss": 1.2551, "step": 1906 }, { "epoch": 0.5260689655172414, "grad_norm": 3.930614471435547, "learning_rate": 9.917001708926512e-06, "loss": 1.2796, "step": 1907 }, { "epoch": 0.5263448275862069, "grad_norm": 3.6792070865631104, "learning_rate": 9.916836091277617e-06, "loss": 1.3347, "step": 1908 }, { "epoch": 0.5266206896551724, "grad_norm": 3.7320663928985596, "learning_rate": 9.916670309939505e-06, "loss": 1.3684, "step": 1909 }, { "epoch": 0.526896551724138, "grad_norm": 3.7371082305908203, "learning_rate": 9.916504364917698e-06, "loss": 1.3183, "step": 1910 }, { "epoch": 0.5271724137931034, "grad_norm": 3.846198081970215, "learning_rate": 9.916338256217716e-06, "loss": 1.2652, "step": 1911 }, { "epoch": 0.527448275862069, "grad_norm": 4.109994888305664, "learning_rate": 9.916171983845095e-06, "loss": 1.1824, "step": 1912 }, { "epoch": 0.5277241379310345, "grad_norm": 3.9046709537506104, "learning_rate": 9.916005547805365e-06, "loss": 1.2301, "step": 1913 }, { "epoch": 0.528, "grad_norm": 4.201631546020508, "learning_rate": 9.915838948104071e-06, "loss": 1.2989, "step": 1914 }, { "epoch": 0.5282758620689655, "grad_norm": 3.655163049697876, "learning_rate": 9.915672184746757e-06, "loss": 1.3011, "step": 1915 }, { "epoch": 0.5285517241379311, "grad_norm": 3.6855523586273193, "learning_rate": 9.915505257738975e-06, "loss": 1.2595, "step": 1916 }, { "epoch": 0.5288275862068965, "grad_norm": 3.9149909019470215, "learning_rate": 9.915338167086284e-06, "loss": 1.3209, "step": 1917 }, { "epoch": 0.5291034482758621, "grad_norm": 3.6023833751678467, "learning_rate": 9.915170912794245e-06, "loss": 1.2781, "step": 1918 }, { "epoch": 0.5293793103448275, "grad_norm": 3.550968647003174, "learning_rate": 9.915003494868424e-06, "loss": 1.3594, "step": 1919 }, { "epoch": 0.5296551724137931, "grad_norm": 4.0660319328308105, "learning_rate": 9.914835913314401e-06, "loss": 1.4088, "step": 1920 }, { "epoch": 0.5299310344827586, "grad_norm": 3.616358995437622, "learning_rate": 9.91466816813775e-06, "loss": 1.3156, "step": 1921 }, { "epoch": 0.5302068965517241, "grad_norm": 3.4661898612976074, "learning_rate": 9.914500259344058e-06, "loss": 1.2279, "step": 1922 }, { "epoch": 0.5304827586206896, "grad_norm": 3.2031736373901367, "learning_rate": 9.914332186938911e-06, "loss": 1.0948, "step": 1923 }, { "epoch": 0.5307586206896552, "grad_norm": 4.149169921875, "learning_rate": 9.914163950927909e-06, "loss": 1.4223, "step": 1924 }, { "epoch": 0.5310344827586206, "grad_norm": 3.908419132232666, "learning_rate": 9.91399555131665e-06, "loss": 1.3338, "step": 1925 }, { "epoch": 0.5313103448275862, "grad_norm": 3.9962475299835205, "learning_rate": 9.913826988110741e-06, "loss": 1.3851, "step": 1926 }, { "epoch": 0.5315862068965517, "grad_norm": 4.07468318939209, "learning_rate": 9.913658261315796e-06, "loss": 1.2817, "step": 1927 }, { "epoch": 0.5318620689655172, "grad_norm": 3.8140034675598145, "learning_rate": 9.913489370937431e-06, "loss": 1.2763, "step": 1928 }, { "epoch": 0.5321379310344828, "grad_norm": 3.818382740020752, "learning_rate": 9.913320316981266e-06, "loss": 1.2113, "step": 1929 }, { "epoch": 0.5324137931034483, "grad_norm": 3.8398211002349854, "learning_rate": 9.91315109945293e-06, "loss": 1.2972, "step": 1930 }, { "epoch": 0.5326896551724138, "grad_norm": 3.597611665725708, "learning_rate": 9.912981718358058e-06, "loss": 1.2488, "step": 1931 }, { "epoch": 0.5329655172413793, "grad_norm": 3.404496669769287, "learning_rate": 9.912812173702289e-06, "loss": 1.0902, "step": 1932 }, { "epoch": 0.5332413793103449, "grad_norm": 3.8552935123443604, "learning_rate": 9.912642465491266e-06, "loss": 1.3267, "step": 1933 }, { "epoch": 0.5335172413793103, "grad_norm": 3.8306000232696533, "learning_rate": 9.91247259373064e-06, "loss": 1.3415, "step": 1934 }, { "epoch": 0.5337931034482759, "grad_norm": 3.5458600521087646, "learning_rate": 9.912302558426066e-06, "loss": 1.2183, "step": 1935 }, { "epoch": 0.5340689655172414, "grad_norm": 3.5851619243621826, "learning_rate": 9.912132359583205e-06, "loss": 1.1798, "step": 1936 }, { "epoch": 0.5343448275862069, "grad_norm": 3.6277832984924316, "learning_rate": 9.911961997207722e-06, "loss": 1.318, "step": 1937 }, { "epoch": 0.5346206896551724, "grad_norm": 3.8744473457336426, "learning_rate": 9.911791471305289e-06, "loss": 1.1988, "step": 1938 }, { "epoch": 0.534896551724138, "grad_norm": 3.9031941890716553, "learning_rate": 9.911620781881585e-06, "loss": 1.3246, "step": 1939 }, { "epoch": 0.5351724137931034, "grad_norm": 3.7906765937805176, "learning_rate": 9.911449928942292e-06, "loss": 1.1919, "step": 1940 }, { "epoch": 0.535448275862069, "grad_norm": 4.0002899169921875, "learning_rate": 9.911278912493094e-06, "loss": 1.2085, "step": 1941 }, { "epoch": 0.5357241379310345, "grad_norm": 4.250529766082764, "learning_rate": 9.91110773253969e-06, "loss": 1.2888, "step": 1942 }, { "epoch": 0.536, "grad_norm": 3.62980580329895, "learning_rate": 9.910936389087775e-06, "loss": 1.22, "step": 1943 }, { "epoch": 0.5362758620689655, "grad_norm": 3.933102607727051, "learning_rate": 9.910764882143056e-06, "loss": 1.2619, "step": 1944 }, { "epoch": 0.5365517241379311, "grad_norm": 4.096546173095703, "learning_rate": 9.910593211711239e-06, "loss": 1.2841, "step": 1945 }, { "epoch": 0.5368275862068965, "grad_norm": 3.3989107608795166, "learning_rate": 9.910421377798045e-06, "loss": 1.1816, "step": 1946 }, { "epoch": 0.5371034482758621, "grad_norm": 3.7423603534698486, "learning_rate": 9.910249380409188e-06, "loss": 1.2137, "step": 1947 }, { "epoch": 0.5373793103448276, "grad_norm": 3.882366180419922, "learning_rate": 9.910077219550398e-06, "loss": 1.2894, "step": 1948 }, { "epoch": 0.5376551724137931, "grad_norm": 3.746417999267578, "learning_rate": 9.909904895227407e-06, "loss": 1.2393, "step": 1949 }, { "epoch": 0.5379310344827586, "grad_norm": 3.70053768157959, "learning_rate": 9.909732407445948e-06, "loss": 1.4105, "step": 1950 }, { "epoch": 0.5382068965517242, "grad_norm": 3.7024552822113037, "learning_rate": 9.909559756211768e-06, "loss": 1.165, "step": 1951 }, { "epoch": 0.5384827586206896, "grad_norm": 4.0724968910217285, "learning_rate": 9.909386941530613e-06, "loss": 1.3336, "step": 1952 }, { "epoch": 0.5387586206896552, "grad_norm": 3.422889232635498, "learning_rate": 9.909213963408236e-06, "loss": 1.2228, "step": 1953 }, { "epoch": 0.5390344827586206, "grad_norm": 3.890603542327881, "learning_rate": 9.909040821850396e-06, "loss": 1.3091, "step": 1954 }, { "epoch": 0.5393103448275862, "grad_norm": 4.395493507385254, "learning_rate": 9.908867516862856e-06, "loss": 1.3691, "step": 1955 }, { "epoch": 0.5395862068965517, "grad_norm": 4.032992839813232, "learning_rate": 9.908694048451388e-06, "loss": 1.3193, "step": 1956 }, { "epoch": 0.5398620689655172, "grad_norm": 4.048177719116211, "learning_rate": 9.908520416621766e-06, "loss": 1.3369, "step": 1957 }, { "epoch": 0.5401379310344827, "grad_norm": 3.7917425632476807, "learning_rate": 9.908346621379767e-06, "loss": 1.1892, "step": 1958 }, { "epoch": 0.5404137931034483, "grad_norm": 3.682335615158081, "learning_rate": 9.908172662731183e-06, "loss": 1.425, "step": 1959 }, { "epoch": 0.5406896551724137, "grad_norm": 3.886324167251587, "learning_rate": 9.907998540681801e-06, "loss": 1.2291, "step": 1960 }, { "epoch": 0.5409655172413793, "grad_norm": 3.831667423248291, "learning_rate": 9.90782425523742e-06, "loss": 1.0874, "step": 1961 }, { "epoch": 0.5412413793103448, "grad_norm": 3.9399032592773438, "learning_rate": 9.90764980640384e-06, "loss": 1.3094, "step": 1962 }, { "epoch": 0.5415172413793103, "grad_norm": 3.5292985439300537, "learning_rate": 9.907475194186873e-06, "loss": 1.1647, "step": 1963 }, { "epoch": 0.5417931034482759, "grad_norm": 3.8143043518066406, "learning_rate": 9.907300418592326e-06, "loss": 1.2862, "step": 1964 }, { "epoch": 0.5420689655172414, "grad_norm": 3.717121124267578, "learning_rate": 9.907125479626024e-06, "loss": 1.2545, "step": 1965 }, { "epoch": 0.542344827586207, "grad_norm": 3.863797187805176, "learning_rate": 9.906950377293784e-06, "loss": 1.3529, "step": 1966 }, { "epoch": 0.5426206896551724, "grad_norm": 4.140127658843994, "learning_rate": 9.906775111601444e-06, "loss": 1.2615, "step": 1967 }, { "epoch": 0.542896551724138, "grad_norm": 3.9512200355529785, "learning_rate": 9.906599682554829e-06, "loss": 1.3076, "step": 1968 }, { "epoch": 0.5431724137931034, "grad_norm": 3.8936450481414795, "learning_rate": 9.906424090159787e-06, "loss": 1.2887, "step": 1969 }, { "epoch": 0.543448275862069, "grad_norm": 4.310616970062256, "learning_rate": 9.906248334422162e-06, "loss": 1.3085, "step": 1970 }, { "epoch": 0.5437241379310345, "grad_norm": 4.001469135284424, "learning_rate": 9.906072415347803e-06, "loss": 1.3562, "step": 1971 }, { "epoch": 0.544, "grad_norm": 3.995680093765259, "learning_rate": 9.905896332942568e-06, "loss": 1.3052, "step": 1972 }, { "epoch": 0.5442758620689655, "grad_norm": 3.7738378047943115, "learning_rate": 9.905720087212319e-06, "loss": 1.3092, "step": 1973 }, { "epoch": 0.5445517241379311, "grad_norm": 4.223038196563721, "learning_rate": 9.905543678162924e-06, "loss": 1.474, "step": 1974 }, { "epoch": 0.5448275862068965, "grad_norm": 3.613691806793213, "learning_rate": 9.905367105800256e-06, "loss": 1.2965, "step": 1975 }, { "epoch": 0.5451034482758621, "grad_norm": 4.524150848388672, "learning_rate": 9.905190370130192e-06, "loss": 1.3799, "step": 1976 }, { "epoch": 0.5453793103448276, "grad_norm": 4.063584327697754, "learning_rate": 9.905013471158616e-06, "loss": 1.2666, "step": 1977 }, { "epoch": 0.5456551724137931, "grad_norm": 3.7156174182891846, "learning_rate": 9.904836408891419e-06, "loss": 1.3926, "step": 1978 }, { "epoch": 0.5459310344827586, "grad_norm": 3.891862392425537, "learning_rate": 9.904659183334493e-06, "loss": 1.2162, "step": 1979 }, { "epoch": 0.5462068965517242, "grad_norm": 3.737574338912964, "learning_rate": 9.904481794493741e-06, "loss": 1.2555, "step": 1980 }, { "epoch": 0.5464827586206896, "grad_norm": 3.6605944633483887, "learning_rate": 9.904304242375066e-06, "loss": 1.2568, "step": 1981 }, { "epoch": 0.5467586206896552, "grad_norm": 3.887524127960205, "learning_rate": 9.904126526984382e-06, "loss": 1.3907, "step": 1982 }, { "epoch": 0.5470344827586207, "grad_norm": 3.4104461669921875, "learning_rate": 9.903948648327603e-06, "loss": 1.1992, "step": 1983 }, { "epoch": 0.5473103448275862, "grad_norm": 3.804082155227661, "learning_rate": 9.903770606410651e-06, "loss": 1.3446, "step": 1984 }, { "epoch": 0.5475862068965517, "grad_norm": 3.4522531032562256, "learning_rate": 9.903592401239454e-06, "loss": 1.3142, "step": 1985 }, { "epoch": 0.5478620689655173, "grad_norm": 3.935791492462158, "learning_rate": 9.903414032819944e-06, "loss": 1.4515, "step": 1986 }, { "epoch": 0.5481379310344827, "grad_norm": 3.496079683303833, "learning_rate": 9.90323550115806e-06, "loss": 1.1319, "step": 1987 }, { "epoch": 0.5484137931034483, "grad_norm": 3.616636037826538, "learning_rate": 9.903056806259747e-06, "loss": 1.2014, "step": 1988 }, { "epoch": 0.5486896551724137, "grad_norm": 4.061394214630127, "learning_rate": 9.902877948130951e-06, "loss": 1.4221, "step": 1989 }, { "epoch": 0.5489655172413793, "grad_norm": 4.022617340087891, "learning_rate": 9.902698926777627e-06, "loss": 1.3423, "step": 1990 }, { "epoch": 0.5492413793103448, "grad_norm": 4.024367809295654, "learning_rate": 9.902519742205736e-06, "loss": 1.3766, "step": 1991 }, { "epoch": 0.5495172413793104, "grad_norm": 3.912874698638916, "learning_rate": 9.902340394421244e-06, "loss": 1.285, "step": 1992 }, { "epoch": 0.5497931034482758, "grad_norm": 4.113869667053223, "learning_rate": 9.902160883430121e-06, "loss": 1.2067, "step": 1993 }, { "epoch": 0.5500689655172414, "grad_norm": 3.6777703762054443, "learning_rate": 9.901981209238342e-06, "loss": 1.2397, "step": 1994 }, { "epoch": 0.5503448275862068, "grad_norm": 3.774811029434204, "learning_rate": 9.901801371851889e-06, "loss": 1.3353, "step": 1995 }, { "epoch": 0.5506206896551724, "grad_norm": 3.5015437602996826, "learning_rate": 9.901621371276751e-06, "loss": 1.2942, "step": 1996 }, { "epoch": 0.550896551724138, "grad_norm": 4.196155548095703, "learning_rate": 9.901441207518918e-06, "loss": 1.1992, "step": 1997 }, { "epoch": 0.5511724137931034, "grad_norm": 4.054973125457764, "learning_rate": 9.901260880584392e-06, "loss": 1.3442, "step": 1998 }, { "epoch": 0.551448275862069, "grad_norm": 4.06377649307251, "learning_rate": 9.90108039047917e-06, "loss": 1.3825, "step": 1999 }, { "epoch": 0.5517241379310345, "grad_norm": 3.9548678398132324, "learning_rate": 9.900899737209267e-06, "loss": 1.1604, "step": 2000 }, { "epoch": 0.5517241379310345, "eval_loss": 1.3033568859100342, "eval_runtime": 11.6734, "eval_samples_per_second": 34.266, "eval_steps_per_second": 4.283, "step": 2000 }, { "epoch": 0.552, "grad_norm": 4.332686424255371, "learning_rate": 9.900718920780692e-06, "loss": 1.4051, "step": 2001 }, { "epoch": 0.5522758620689655, "grad_norm": 3.775212049484253, "learning_rate": 9.90053794119947e-06, "loss": 1.2439, "step": 2002 }, { "epoch": 0.5525517241379311, "grad_norm": 3.8522465229034424, "learning_rate": 9.90035679847162e-06, "loss": 1.3383, "step": 2003 }, { "epoch": 0.5528275862068965, "grad_norm": 4.358460426330566, "learning_rate": 9.900175492603178e-06, "loss": 1.2876, "step": 2004 }, { "epoch": 0.5531034482758621, "grad_norm": 3.895542860031128, "learning_rate": 9.899994023600178e-06, "loss": 1.3521, "step": 2005 }, { "epoch": 0.5533793103448276, "grad_norm": 4.1610918045043945, "learning_rate": 9.899812391468661e-06, "loss": 1.35, "step": 2006 }, { "epoch": 0.5536551724137931, "grad_norm": 3.793337106704712, "learning_rate": 9.899630596214673e-06, "loss": 1.1664, "step": 2007 }, { "epoch": 0.5539310344827586, "grad_norm": 3.5027658939361572, "learning_rate": 9.89944863784427e-06, "loss": 1.2544, "step": 2008 }, { "epoch": 0.5542068965517242, "grad_norm": 3.5387942790985107, "learning_rate": 9.899266516363505e-06, "loss": 1.2535, "step": 2009 }, { "epoch": 0.5544827586206896, "grad_norm": 3.86106276512146, "learning_rate": 9.899084231778441e-06, "loss": 1.3349, "step": 2010 }, { "epoch": 0.5547586206896552, "grad_norm": 3.554427146911621, "learning_rate": 9.89890178409515e-06, "loss": 1.2013, "step": 2011 }, { "epoch": 0.5550344827586207, "grad_norm": 4.316711902618408, "learning_rate": 9.898719173319705e-06, "loss": 1.508, "step": 2012 }, { "epoch": 0.5553103448275862, "grad_norm": 4.145756721496582, "learning_rate": 9.898536399458185e-06, "loss": 1.2397, "step": 2013 }, { "epoch": 0.5555862068965517, "grad_norm": 3.606245756149292, "learning_rate": 9.898353462516676e-06, "loss": 1.2316, "step": 2014 }, { "epoch": 0.5558620689655173, "grad_norm": 3.748403310775757, "learning_rate": 9.898170362501264e-06, "loss": 1.2008, "step": 2015 }, { "epoch": 0.5561379310344827, "grad_norm": 3.8794751167297363, "learning_rate": 9.897987099418051e-06, "loss": 1.3194, "step": 2016 }, { "epoch": 0.5564137931034483, "grad_norm": 4.013868808746338, "learning_rate": 9.897803673273134e-06, "loss": 1.3451, "step": 2017 }, { "epoch": 0.5566896551724138, "grad_norm": 3.644962787628174, "learning_rate": 9.897620084072619e-06, "loss": 1.3366, "step": 2018 }, { "epoch": 0.5569655172413793, "grad_norm": 3.6595845222473145, "learning_rate": 9.89743633182262e-06, "loss": 1.2139, "step": 2019 }, { "epoch": 0.5572413793103448, "grad_norm": 4.098719120025635, "learning_rate": 9.897252416529254e-06, "loss": 1.4915, "step": 2020 }, { "epoch": 0.5575172413793104, "grad_norm": 3.854229211807251, "learning_rate": 9.897068338198643e-06, "loss": 1.2483, "step": 2021 }, { "epoch": 0.5577931034482758, "grad_norm": 3.6355531215667725, "learning_rate": 9.896884096836916e-06, "loss": 1.2553, "step": 2022 }, { "epoch": 0.5580689655172414, "grad_norm": 4.145900726318359, "learning_rate": 9.896699692450207e-06, "loss": 1.4078, "step": 2023 }, { "epoch": 0.5583448275862068, "grad_norm": 4.017760276794434, "learning_rate": 9.896515125044655e-06, "loss": 1.3197, "step": 2024 }, { "epoch": 0.5586206896551724, "grad_norm": 3.7474067211151123, "learning_rate": 9.896330394626405e-06, "loss": 1.3431, "step": 2025 }, { "epoch": 0.5588965517241379, "grad_norm": 3.648467779159546, "learning_rate": 9.896145501201604e-06, "loss": 1.2348, "step": 2026 }, { "epoch": 0.5591724137931035, "grad_norm": 3.7951695919036865, "learning_rate": 9.895960444776411e-06, "loss": 1.3838, "step": 2027 }, { "epoch": 0.5594482758620689, "grad_norm": 3.655400514602661, "learning_rate": 9.895775225356984e-06, "loss": 1.139, "step": 2028 }, { "epoch": 0.5597241379310345, "grad_norm": 4.054412841796875, "learning_rate": 9.895589842949492e-06, "loss": 1.1861, "step": 2029 }, { "epoch": 0.56, "grad_norm": 3.7889745235443115, "learning_rate": 9.895404297560106e-06, "loss": 1.2831, "step": 2030 }, { "epoch": 0.5602758620689655, "grad_norm": 3.6179428100585938, "learning_rate": 9.895218589195e-06, "loss": 1.2412, "step": 2031 }, { "epoch": 0.5605517241379311, "grad_norm": 4.51393461227417, "learning_rate": 9.89503271786036e-06, "loss": 1.4575, "step": 2032 }, { "epoch": 0.5608275862068965, "grad_norm": 3.8910300731658936, "learning_rate": 9.894846683562373e-06, "loss": 1.268, "step": 2033 }, { "epoch": 0.5611034482758621, "grad_norm": 3.864078998565674, "learning_rate": 9.894660486307232e-06, "loss": 1.3929, "step": 2034 }, { "epoch": 0.5613793103448276, "grad_norm": 3.555663585662842, "learning_rate": 9.894474126101136e-06, "loss": 1.1669, "step": 2035 }, { "epoch": 0.5616551724137931, "grad_norm": 4.005310535430908, "learning_rate": 9.89428760295029e-06, "loss": 1.2062, "step": 2036 }, { "epoch": 0.5619310344827586, "grad_norm": 3.543938398361206, "learning_rate": 9.894100916860902e-06, "loss": 1.16, "step": 2037 }, { "epoch": 0.5622068965517242, "grad_norm": 3.935366630554199, "learning_rate": 9.893914067839189e-06, "loss": 1.1684, "step": 2038 }, { "epoch": 0.5624827586206896, "grad_norm": 3.557985305786133, "learning_rate": 9.89372705589137e-06, "loss": 1.1284, "step": 2039 }, { "epoch": 0.5627586206896552, "grad_norm": 4.288267135620117, "learning_rate": 9.89353988102367e-06, "loss": 1.1706, "step": 2040 }, { "epoch": 0.5630344827586207, "grad_norm": 3.7827718257904053, "learning_rate": 9.893352543242325e-06, "loss": 1.3042, "step": 2041 }, { "epoch": 0.5633103448275862, "grad_norm": 3.829536199569702, "learning_rate": 9.893165042553564e-06, "loss": 1.3714, "step": 2042 }, { "epoch": 0.5635862068965517, "grad_norm": 3.958066701889038, "learning_rate": 9.892977378963637e-06, "loss": 1.2227, "step": 2043 }, { "epoch": 0.5638620689655173, "grad_norm": 3.5735414028167725, "learning_rate": 9.89278955247879e-06, "loss": 1.3131, "step": 2044 }, { "epoch": 0.5641379310344827, "grad_norm": 4.0558929443359375, "learning_rate": 9.89260156310527e-06, "loss": 1.3233, "step": 2045 }, { "epoch": 0.5644137931034483, "grad_norm": 4.196530818939209, "learning_rate": 9.892413410849342e-06, "loss": 1.2389, "step": 2046 }, { "epoch": 0.5646896551724138, "grad_norm": 3.3888604640960693, "learning_rate": 9.892225095717268e-06, "loss": 1.1356, "step": 2047 }, { "epoch": 0.5649655172413793, "grad_norm": 3.6495578289031982, "learning_rate": 9.892036617715317e-06, "loss": 1.2417, "step": 2048 }, { "epoch": 0.5652413793103448, "grad_norm": 4.037240982055664, "learning_rate": 9.891847976849762e-06, "loss": 1.3102, "step": 2049 }, { "epoch": 0.5655172413793104, "grad_norm": 4.046078205108643, "learning_rate": 9.891659173126887e-06, "loss": 1.3193, "step": 2050 }, { "epoch": 0.5657931034482758, "grad_norm": 3.788846015930176, "learning_rate": 9.891470206552975e-06, "loss": 1.3785, "step": 2051 }, { "epoch": 0.5660689655172414, "grad_norm": 3.6972615718841553, "learning_rate": 9.891281077134318e-06, "loss": 1.2341, "step": 2052 }, { "epoch": 0.5663448275862069, "grad_norm": 3.767594814300537, "learning_rate": 9.891091784877208e-06, "loss": 1.2556, "step": 2053 }, { "epoch": 0.5666206896551724, "grad_norm": 3.582402467727661, "learning_rate": 9.890902329787956e-06, "loss": 1.2833, "step": 2054 }, { "epoch": 0.5668965517241379, "grad_norm": 3.6754989624023438, "learning_rate": 9.890712711872862e-06, "loss": 1.3124, "step": 2055 }, { "epoch": 0.5671724137931035, "grad_norm": 3.7833335399627686, "learning_rate": 9.89052293113824e-06, "loss": 1.2982, "step": 2056 }, { "epoch": 0.5674482758620689, "grad_norm": 3.8897101879119873, "learning_rate": 9.89033298759041e-06, "loss": 1.3594, "step": 2057 }, { "epoch": 0.5677241379310345, "grad_norm": 4.108199119567871, "learning_rate": 9.890142881235692e-06, "loss": 1.3022, "step": 2058 }, { "epoch": 0.568, "grad_norm": 3.5733144283294678, "learning_rate": 9.88995261208042e-06, "loss": 1.192, "step": 2059 }, { "epoch": 0.5682758620689655, "grad_norm": 3.834634304046631, "learning_rate": 9.889762180130924e-06, "loss": 1.4681, "step": 2060 }, { "epoch": 0.568551724137931, "grad_norm": 3.641150951385498, "learning_rate": 9.889571585393545e-06, "loss": 1.1847, "step": 2061 }, { "epoch": 0.5688275862068966, "grad_norm": 3.454916477203369, "learning_rate": 9.88938082787463e-06, "loss": 1.1771, "step": 2062 }, { "epoch": 0.5691034482758621, "grad_norm": 3.711855173110962, "learning_rate": 9.889189907580528e-06, "loss": 1.1679, "step": 2063 }, { "epoch": 0.5693793103448276, "grad_norm": 3.979401111602783, "learning_rate": 9.888998824517593e-06, "loss": 1.3618, "step": 2064 }, { "epoch": 0.5696551724137932, "grad_norm": 3.712376832962036, "learning_rate": 9.88880757869219e-06, "loss": 1.3254, "step": 2065 }, { "epoch": 0.5699310344827586, "grad_norm": 3.78497052192688, "learning_rate": 9.888616170110685e-06, "loss": 1.2504, "step": 2066 }, { "epoch": 0.5702068965517242, "grad_norm": 3.6951844692230225, "learning_rate": 9.88842459877945e-06, "loss": 1.2882, "step": 2067 }, { "epoch": 0.5704827586206896, "grad_norm": 3.909163475036621, "learning_rate": 9.888232864704864e-06, "loss": 1.2361, "step": 2068 }, { "epoch": 0.5707586206896552, "grad_norm": 3.5719332695007324, "learning_rate": 9.888040967893306e-06, "loss": 1.2194, "step": 2069 }, { "epoch": 0.5710344827586207, "grad_norm": 4.313850402832031, "learning_rate": 9.887848908351167e-06, "loss": 1.2224, "step": 2070 }, { "epoch": 0.5713103448275862, "grad_norm": 4.45904016494751, "learning_rate": 9.887656686084844e-06, "loss": 1.4008, "step": 2071 }, { "epoch": 0.5715862068965517, "grad_norm": 3.8564882278442383, "learning_rate": 9.88746430110073e-06, "loss": 1.2336, "step": 2072 }, { "epoch": 0.5718620689655173, "grad_norm": 3.9824304580688477, "learning_rate": 9.887271753405237e-06, "loss": 1.1559, "step": 2073 }, { "epoch": 0.5721379310344827, "grad_norm": 3.9313464164733887, "learning_rate": 9.887079043004769e-06, "loss": 1.3015, "step": 2074 }, { "epoch": 0.5724137931034483, "grad_norm": 3.9027602672576904, "learning_rate": 9.886886169905744e-06, "loss": 1.3425, "step": 2075 }, { "epoch": 0.5726896551724138, "grad_norm": 3.8434503078460693, "learning_rate": 9.886693134114586e-06, "loss": 1.2401, "step": 2076 }, { "epoch": 0.5729655172413793, "grad_norm": 3.8335959911346436, "learning_rate": 9.886499935637716e-06, "loss": 1.3383, "step": 2077 }, { "epoch": 0.5732413793103448, "grad_norm": 3.879772186279297, "learning_rate": 9.88630657448157e-06, "loss": 1.5301, "step": 2078 }, { "epoch": 0.5735172413793104, "grad_norm": 3.441643476486206, "learning_rate": 9.886113050652584e-06, "loss": 1.2433, "step": 2079 }, { "epoch": 0.5737931034482758, "grad_norm": 3.4879937171936035, "learning_rate": 9.885919364157201e-06, "loss": 1.1559, "step": 2080 }, { "epoch": 0.5740689655172414, "grad_norm": 3.8398709297180176, "learning_rate": 9.885725515001867e-06, "loss": 1.3487, "step": 2081 }, { "epoch": 0.5743448275862069, "grad_norm": 3.707289695739746, "learning_rate": 9.885531503193038e-06, "loss": 1.2276, "step": 2082 }, { "epoch": 0.5746206896551724, "grad_norm": 3.989063024520874, "learning_rate": 9.885337328737173e-06, "loss": 1.2969, "step": 2083 }, { "epoch": 0.5748965517241379, "grad_norm": 3.6874043941497803, "learning_rate": 9.885142991640734e-06, "loss": 1.3627, "step": 2084 }, { "epoch": 0.5751724137931035, "grad_norm": 3.576540946960449, "learning_rate": 9.884948491910195e-06, "loss": 1.2926, "step": 2085 }, { "epoch": 0.5754482758620689, "grad_norm": 3.917767286300659, "learning_rate": 9.884753829552027e-06, "loss": 1.437, "step": 2086 }, { "epoch": 0.5757241379310345, "grad_norm": 3.9262351989746094, "learning_rate": 9.884559004572714e-06, "loss": 1.29, "step": 2087 }, { "epoch": 0.576, "grad_norm": 4.0742292404174805, "learning_rate": 9.884364016978738e-06, "loss": 1.3688, "step": 2088 }, { "epoch": 0.5762758620689655, "grad_norm": 3.881096601486206, "learning_rate": 9.884168866776594e-06, "loss": 1.2815, "step": 2089 }, { "epoch": 0.576551724137931, "grad_norm": 3.858982563018799, "learning_rate": 9.883973553972777e-06, "loss": 1.2817, "step": 2090 }, { "epoch": 0.5768275862068966, "grad_norm": 3.777719497680664, "learning_rate": 9.88377807857379e-06, "loss": 1.3293, "step": 2091 }, { "epoch": 0.577103448275862, "grad_norm": 3.932751417160034, "learning_rate": 9.88358244058614e-06, "loss": 1.3154, "step": 2092 }, { "epoch": 0.5773793103448276, "grad_norm": 3.809624195098877, "learning_rate": 9.883386640016341e-06, "loss": 1.2222, "step": 2093 }, { "epoch": 0.577655172413793, "grad_norm": 3.6169984340667725, "learning_rate": 9.883190676870911e-06, "loss": 1.1367, "step": 2094 }, { "epoch": 0.5779310344827586, "grad_norm": 4.071712493896484, "learning_rate": 9.882994551156374e-06, "loss": 1.4084, "step": 2095 }, { "epoch": 0.5782068965517242, "grad_norm": 3.6495273113250732, "learning_rate": 9.882798262879261e-06, "loss": 1.1695, "step": 2096 }, { "epoch": 0.5784827586206897, "grad_norm": 4.169198036193848, "learning_rate": 9.882601812046103e-06, "loss": 1.3476, "step": 2097 }, { "epoch": 0.5787586206896552, "grad_norm": 3.66340970993042, "learning_rate": 9.882405198663444e-06, "loss": 1.2594, "step": 2098 }, { "epoch": 0.5790344827586207, "grad_norm": 3.949554920196533, "learning_rate": 9.882208422737825e-06, "loss": 1.2891, "step": 2099 }, { "epoch": 0.5793103448275863, "grad_norm": 3.9280314445495605, "learning_rate": 9.882011484275802e-06, "loss": 1.2419, "step": 2100 }, { "epoch": 0.5795862068965517, "grad_norm": 4.017632484436035, "learning_rate": 9.881814383283929e-06, "loss": 1.3348, "step": 2101 }, { "epoch": 0.5798620689655173, "grad_norm": 4.068256378173828, "learning_rate": 9.88161711976877e-06, "loss": 1.1439, "step": 2102 }, { "epoch": 0.5801379310344827, "grad_norm": 3.88515567779541, "learning_rate": 9.881419693736887e-06, "loss": 1.1741, "step": 2103 }, { "epoch": 0.5804137931034483, "grad_norm": 3.6603658199310303, "learning_rate": 9.881222105194856e-06, "loss": 1.196, "step": 2104 }, { "epoch": 0.5806896551724138, "grad_norm": 3.672152042388916, "learning_rate": 9.881024354149257e-06, "loss": 1.3561, "step": 2105 }, { "epoch": 0.5809655172413793, "grad_norm": 3.635246515274048, "learning_rate": 9.88082644060667e-06, "loss": 1.2862, "step": 2106 }, { "epoch": 0.5812413793103448, "grad_norm": 3.748656988143921, "learning_rate": 9.880628364573685e-06, "loss": 1.2697, "step": 2107 }, { "epoch": 0.5815172413793104, "grad_norm": 3.8105459213256836, "learning_rate": 9.880430126056898e-06, "loss": 1.28, "step": 2108 }, { "epoch": 0.5817931034482758, "grad_norm": 4.159584045410156, "learning_rate": 9.880231725062907e-06, "loss": 1.2801, "step": 2109 }, { "epoch": 0.5820689655172414, "grad_norm": 3.670525074005127, "learning_rate": 9.880033161598315e-06, "loss": 1.3667, "step": 2110 }, { "epoch": 0.5823448275862069, "grad_norm": 4.1216888427734375, "learning_rate": 9.879834435669737e-06, "loss": 1.3413, "step": 2111 }, { "epoch": 0.5826206896551724, "grad_norm": 3.521090269088745, "learning_rate": 9.879635547283786e-06, "loss": 1.2272, "step": 2112 }, { "epoch": 0.5828965517241379, "grad_norm": 3.3463616371154785, "learning_rate": 9.879436496447084e-06, "loss": 1.1559, "step": 2113 }, { "epoch": 0.5831724137931035, "grad_norm": 3.8195526599884033, "learning_rate": 9.879237283166257e-06, "loss": 1.2904, "step": 2114 }, { "epoch": 0.5834482758620689, "grad_norm": 4.151013374328613, "learning_rate": 9.879037907447937e-06, "loss": 1.3185, "step": 2115 }, { "epoch": 0.5837241379310345, "grad_norm": 3.8336124420166016, "learning_rate": 9.878838369298764e-06, "loss": 1.3798, "step": 2116 }, { "epoch": 0.584, "grad_norm": 3.734511375427246, "learning_rate": 9.87863866872538e-06, "loss": 1.2344, "step": 2117 }, { "epoch": 0.5842758620689655, "grad_norm": 3.9928600788116455, "learning_rate": 9.878438805734429e-06, "loss": 1.3736, "step": 2118 }, { "epoch": 0.584551724137931, "grad_norm": 3.930598735809326, "learning_rate": 9.878238780332572e-06, "loss": 1.2161, "step": 2119 }, { "epoch": 0.5848275862068966, "grad_norm": 3.316005229949951, "learning_rate": 9.878038592526462e-06, "loss": 1.1361, "step": 2120 }, { "epoch": 0.585103448275862, "grad_norm": 3.495266914367676, "learning_rate": 9.877838242322768e-06, "loss": 1.0543, "step": 2121 }, { "epoch": 0.5853793103448276, "grad_norm": 3.658935546875, "learning_rate": 9.877637729728157e-06, "loss": 1.257, "step": 2122 }, { "epoch": 0.5856551724137931, "grad_norm": 3.6395838260650635, "learning_rate": 9.877437054749305e-06, "loss": 1.1666, "step": 2123 }, { "epoch": 0.5859310344827586, "grad_norm": 3.4881346225738525, "learning_rate": 9.877236217392894e-06, "loss": 1.1199, "step": 2124 }, { "epoch": 0.5862068965517241, "grad_norm": 3.619462728500366, "learning_rate": 9.877035217665608e-06, "loss": 1.2865, "step": 2125 }, { "epoch": 0.5864827586206897, "grad_norm": 3.969578504562378, "learning_rate": 9.876834055574141e-06, "loss": 1.2581, "step": 2126 }, { "epoch": 0.5867586206896551, "grad_norm": 4.02641487121582, "learning_rate": 9.87663273112519e-06, "loss": 1.1651, "step": 2127 }, { "epoch": 0.5870344827586207, "grad_norm": 3.6376590728759766, "learning_rate": 9.876431244325455e-06, "loss": 1.2594, "step": 2128 }, { "epoch": 0.5873103448275863, "grad_norm": 4.859394550323486, "learning_rate": 9.876229595181646e-06, "loss": 1.1675, "step": 2129 }, { "epoch": 0.5875862068965517, "grad_norm": 3.590315580368042, "learning_rate": 9.876027783700476e-06, "loss": 1.2121, "step": 2130 }, { "epoch": 0.5878620689655173, "grad_norm": 4.539538383483887, "learning_rate": 9.875825809888662e-06, "loss": 1.3228, "step": 2131 }, { "epoch": 0.5881379310344828, "grad_norm": 4.069293022155762, "learning_rate": 9.875623673752927e-06, "loss": 1.1897, "step": 2132 }, { "epoch": 0.5884137931034483, "grad_norm": 3.922513246536255, "learning_rate": 9.875421375300007e-06, "loss": 1.2838, "step": 2133 }, { "epoch": 0.5886896551724138, "grad_norm": 3.86944317817688, "learning_rate": 9.875218914536629e-06, "loss": 1.2226, "step": 2134 }, { "epoch": 0.5889655172413794, "grad_norm": 3.5745339393615723, "learning_rate": 9.875016291469538e-06, "loss": 1.2557, "step": 2135 }, { "epoch": 0.5892413793103448, "grad_norm": 3.744875907897949, "learning_rate": 9.874813506105477e-06, "loss": 1.1497, "step": 2136 }, { "epoch": 0.5895172413793104, "grad_norm": 4.325474262237549, "learning_rate": 9.874610558451198e-06, "loss": 1.2844, "step": 2137 }, { "epoch": 0.5897931034482758, "grad_norm": 3.9217560291290283, "learning_rate": 9.87440744851346e-06, "loss": 1.1953, "step": 2138 }, { "epoch": 0.5900689655172414, "grad_norm": 3.6843984127044678, "learning_rate": 9.874204176299022e-06, "loss": 1.2779, "step": 2139 }, { "epoch": 0.5903448275862069, "grad_norm": 3.6573870182037354, "learning_rate": 9.87400074181465e-06, "loss": 1.3739, "step": 2140 }, { "epoch": 0.5906206896551724, "grad_norm": 4.222965240478516, "learning_rate": 9.873797145067122e-06, "loss": 1.3303, "step": 2141 }, { "epoch": 0.5908965517241379, "grad_norm": 4.0142717361450195, "learning_rate": 9.873593386063208e-06, "loss": 1.2315, "step": 2142 }, { "epoch": 0.5911724137931035, "grad_norm": 3.24295973777771, "learning_rate": 9.873389464809698e-06, "loss": 1.154, "step": 2143 }, { "epoch": 0.5914482758620689, "grad_norm": 4.015010833740234, "learning_rate": 9.87318538131338e-06, "loss": 1.3389, "step": 2144 }, { "epoch": 0.5917241379310345, "grad_norm": 4.0750627517700195, "learning_rate": 9.872981135581045e-06, "loss": 1.3774, "step": 2145 }, { "epoch": 0.592, "grad_norm": 3.676801919937134, "learning_rate": 9.872776727619497e-06, "loss": 1.2637, "step": 2146 }, { "epoch": 0.5922758620689655, "grad_norm": 3.6240739822387695, "learning_rate": 9.872572157435537e-06, "loss": 1.2554, "step": 2147 }, { "epoch": 0.592551724137931, "grad_norm": 3.7029004096984863, "learning_rate": 9.872367425035978e-06, "loss": 1.2732, "step": 2148 }, { "epoch": 0.5928275862068966, "grad_norm": 3.6618831157684326, "learning_rate": 9.872162530427635e-06, "loss": 1.2811, "step": 2149 }, { "epoch": 0.593103448275862, "grad_norm": 3.593695878982544, "learning_rate": 9.87195747361733e-06, "loss": 1.1869, "step": 2150 }, { "epoch": 0.5933793103448276, "grad_norm": 3.6859822273254395, "learning_rate": 9.871752254611888e-06, "loss": 1.4132, "step": 2151 }, { "epoch": 0.5936551724137931, "grad_norm": 3.865025520324707, "learning_rate": 9.871546873418143e-06, "loss": 1.4922, "step": 2152 }, { "epoch": 0.5939310344827586, "grad_norm": 3.8302013874053955, "learning_rate": 9.871341330042932e-06, "loss": 1.3891, "step": 2153 }, { "epoch": 0.5942068965517241, "grad_norm": 4.016648769378662, "learning_rate": 9.871135624493096e-06, "loss": 1.2912, "step": 2154 }, { "epoch": 0.5944827586206897, "grad_norm": 3.6244726181030273, "learning_rate": 9.870929756775485e-06, "loss": 1.2117, "step": 2155 }, { "epoch": 0.5947586206896551, "grad_norm": 4.074698448181152, "learning_rate": 9.870723726896953e-06, "loss": 1.2733, "step": 2156 }, { "epoch": 0.5950344827586207, "grad_norm": 4.112252235412598, "learning_rate": 9.870517534864358e-06, "loss": 1.3701, "step": 2157 }, { "epoch": 0.5953103448275862, "grad_norm": 3.5190844535827637, "learning_rate": 9.870311180684567e-06, "loss": 1.2478, "step": 2158 }, { "epoch": 0.5955862068965517, "grad_norm": 3.4871017932891846, "learning_rate": 9.870104664364444e-06, "loss": 1.2793, "step": 2159 }, { "epoch": 0.5958620689655172, "grad_norm": 3.4859557151794434, "learning_rate": 9.86989798591087e-06, "loss": 1.2081, "step": 2160 }, { "epoch": 0.5961379310344828, "grad_norm": 3.7964541912078857, "learning_rate": 9.869691145330725e-06, "loss": 1.393, "step": 2161 }, { "epoch": 0.5964137931034483, "grad_norm": 3.7112858295440674, "learning_rate": 9.869484142630893e-06, "loss": 1.3687, "step": 2162 }, { "epoch": 0.5966896551724138, "grad_norm": 3.7532198429107666, "learning_rate": 9.869276977818265e-06, "loss": 1.3407, "step": 2163 }, { "epoch": 0.5969655172413794, "grad_norm": 3.708103656768799, "learning_rate": 9.869069650899738e-06, "loss": 1.3196, "step": 2164 }, { "epoch": 0.5972413793103448, "grad_norm": 3.5885696411132812, "learning_rate": 9.868862161882219e-06, "loss": 1.3007, "step": 2165 }, { "epoch": 0.5975172413793104, "grad_norm": 3.728771686553955, "learning_rate": 9.868654510772609e-06, "loss": 1.2173, "step": 2166 }, { "epoch": 0.5977931034482759, "grad_norm": 3.646108627319336, "learning_rate": 9.868446697577825e-06, "loss": 1.1428, "step": 2167 }, { "epoch": 0.5980689655172414, "grad_norm": 3.434239625930786, "learning_rate": 9.868238722304783e-06, "loss": 1.2855, "step": 2168 }, { "epoch": 0.5983448275862069, "grad_norm": 3.6955454349517822, "learning_rate": 9.86803058496041e-06, "loss": 1.2679, "step": 2169 }, { "epoch": 0.5986206896551725, "grad_norm": 3.5592849254608154, "learning_rate": 9.867822285551631e-06, "loss": 1.2254, "step": 2170 }, { "epoch": 0.5988965517241379, "grad_norm": 3.763446569442749, "learning_rate": 9.867613824085386e-06, "loss": 1.381, "step": 2171 }, { "epoch": 0.5991724137931035, "grad_norm": 3.9827306270599365, "learning_rate": 9.86740520056861e-06, "loss": 1.3262, "step": 2172 }, { "epoch": 0.599448275862069, "grad_norm": 3.6004045009613037, "learning_rate": 9.867196415008251e-06, "loss": 1.1133, "step": 2173 }, { "epoch": 0.5997241379310345, "grad_norm": 3.655125141143799, "learning_rate": 9.86698746741126e-06, "loss": 1.3944, "step": 2174 }, { "epoch": 0.6, "grad_norm": 3.6279163360595703, "learning_rate": 9.866778357784589e-06, "loss": 1.3788, "step": 2175 }, { "epoch": 0.6002758620689655, "grad_norm": 4.013826847076416, "learning_rate": 9.866569086135207e-06, "loss": 1.4399, "step": 2176 }, { "epoch": 0.600551724137931, "grad_norm": 3.857469081878662, "learning_rate": 9.866359652470076e-06, "loss": 1.2627, "step": 2177 }, { "epoch": 0.6008275862068966, "grad_norm": 3.6332690715789795, "learning_rate": 9.866150056796168e-06, "loss": 1.1599, "step": 2178 }, { "epoch": 0.601103448275862, "grad_norm": 3.933840036392212, "learning_rate": 9.865940299120464e-06, "loss": 1.3575, "step": 2179 }, { "epoch": 0.6013793103448276, "grad_norm": 3.451317310333252, "learning_rate": 9.865730379449945e-06, "loss": 1.141, "step": 2180 }, { "epoch": 0.6016551724137931, "grad_norm": 3.5626425743103027, "learning_rate": 9.865520297791599e-06, "loss": 1.2952, "step": 2181 }, { "epoch": 0.6019310344827586, "grad_norm": 3.362366199493408, "learning_rate": 9.86531005415242e-06, "loss": 1.1453, "step": 2182 }, { "epoch": 0.6022068965517241, "grad_norm": 4.1837873458862305, "learning_rate": 9.86509964853941e-06, "loss": 1.4185, "step": 2183 }, { "epoch": 0.6024827586206897, "grad_norm": 3.6832728385925293, "learning_rate": 9.86488908095957e-06, "loss": 1.3573, "step": 2184 }, { "epoch": 0.6027586206896551, "grad_norm": 3.719724178314209, "learning_rate": 9.864678351419915e-06, "loss": 1.3789, "step": 2185 }, { "epoch": 0.6030344827586207, "grad_norm": 3.9875481128692627, "learning_rate": 9.864467459927454e-06, "loss": 1.4878, "step": 2186 }, { "epoch": 0.6033103448275862, "grad_norm": 3.554323434829712, "learning_rate": 9.864256406489213e-06, "loss": 1.1659, "step": 2187 }, { "epoch": 0.6035862068965517, "grad_norm": 3.611147880554199, "learning_rate": 9.864045191112218e-06, "loss": 1.1455, "step": 2188 }, { "epoch": 0.6038620689655172, "grad_norm": 3.775747060775757, "learning_rate": 9.863833813803498e-06, "loss": 1.3168, "step": 2189 }, { "epoch": 0.6041379310344828, "grad_norm": 3.5411698818206787, "learning_rate": 9.86362227457009e-06, "loss": 1.2023, "step": 2190 }, { "epoch": 0.6044137931034482, "grad_norm": 3.9304752349853516, "learning_rate": 9.86341057341904e-06, "loss": 1.3673, "step": 2191 }, { "epoch": 0.6046896551724138, "grad_norm": 3.8870387077331543, "learning_rate": 9.863198710357393e-06, "loss": 1.2137, "step": 2192 }, { "epoch": 0.6049655172413793, "grad_norm": 3.6296133995056152, "learning_rate": 9.862986685392204e-06, "loss": 1.3099, "step": 2193 }, { "epoch": 0.6052413793103448, "grad_norm": 3.7292282581329346, "learning_rate": 9.86277449853053e-06, "loss": 1.2022, "step": 2194 }, { "epoch": 0.6055172413793104, "grad_norm": 3.3906826972961426, "learning_rate": 9.862562149779437e-06, "loss": 1.1976, "step": 2195 }, { "epoch": 0.6057931034482759, "grad_norm": 3.9195985794067383, "learning_rate": 9.862349639145991e-06, "loss": 1.2516, "step": 2196 }, { "epoch": 0.6060689655172414, "grad_norm": 3.94577956199646, "learning_rate": 9.86213696663727e-06, "loss": 1.4267, "step": 2197 }, { "epoch": 0.6063448275862069, "grad_norm": 3.9742672443389893, "learning_rate": 9.861924132260354e-06, "loss": 1.3998, "step": 2198 }, { "epoch": 0.6066206896551725, "grad_norm": 3.814790964126587, "learning_rate": 9.861711136022328e-06, "loss": 1.1984, "step": 2199 }, { "epoch": 0.6068965517241379, "grad_norm": 3.6326651573181152, "learning_rate": 9.861497977930282e-06, "loss": 1.246, "step": 2200 }, { "epoch": 0.6071724137931035, "grad_norm": 3.7226383686065674, "learning_rate": 9.861284657991314e-06, "loss": 1.3392, "step": 2201 }, { "epoch": 0.607448275862069, "grad_norm": 3.752480983734131, "learning_rate": 9.861071176212525e-06, "loss": 1.2829, "step": 2202 }, { "epoch": 0.6077241379310345, "grad_norm": 3.423994779586792, "learning_rate": 9.86085753260102e-06, "loss": 1.1879, "step": 2203 }, { "epoch": 0.608, "grad_norm": 3.6108031272888184, "learning_rate": 9.860643727163914e-06, "loss": 1.3539, "step": 2204 }, { "epoch": 0.6082758620689656, "grad_norm": 3.7555043697357178, "learning_rate": 9.860429759908325e-06, "loss": 1.1107, "step": 2205 }, { "epoch": 0.608551724137931, "grad_norm": 3.80519962310791, "learning_rate": 9.860215630841378e-06, "loss": 1.3165, "step": 2206 }, { "epoch": 0.6088275862068966, "grad_norm": 3.3288588523864746, "learning_rate": 9.860001339970197e-06, "loss": 1.2823, "step": 2207 }, { "epoch": 0.609103448275862, "grad_norm": 4.0500969886779785, "learning_rate": 9.859786887301919e-06, "loss": 1.3958, "step": 2208 }, { "epoch": 0.6093793103448276, "grad_norm": 3.654059410095215, "learning_rate": 9.859572272843684e-06, "loss": 1.1696, "step": 2209 }, { "epoch": 0.6096551724137931, "grad_norm": 3.698380470275879, "learning_rate": 9.859357496602635e-06, "loss": 1.2556, "step": 2210 }, { "epoch": 0.6099310344827586, "grad_norm": 3.757591962814331, "learning_rate": 9.859142558585923e-06, "loss": 1.3201, "step": 2211 }, { "epoch": 0.6102068965517241, "grad_norm": 3.8441452980041504, "learning_rate": 9.858927458800704e-06, "loss": 1.3517, "step": 2212 }, { "epoch": 0.6104827586206897, "grad_norm": 4.038410663604736, "learning_rate": 9.85871219725414e-06, "loss": 1.2989, "step": 2213 }, { "epoch": 0.6107586206896551, "grad_norm": 3.5054962635040283, "learning_rate": 9.858496773953394e-06, "loss": 1.2933, "step": 2214 }, { "epoch": 0.6110344827586207, "grad_norm": 3.844771146774292, "learning_rate": 9.858281188905643e-06, "loss": 1.2552, "step": 2215 }, { "epoch": 0.6113103448275862, "grad_norm": 3.4589498043060303, "learning_rate": 9.858065442118058e-06, "loss": 1.1606, "step": 2216 }, { "epoch": 0.6115862068965517, "grad_norm": 3.6349682807922363, "learning_rate": 9.857849533597826e-06, "loss": 1.1925, "step": 2217 }, { "epoch": 0.6118620689655172, "grad_norm": 3.7616970539093018, "learning_rate": 9.857633463352134e-06, "loss": 1.2716, "step": 2218 }, { "epoch": 0.6121379310344828, "grad_norm": 3.9745967388153076, "learning_rate": 9.857417231388174e-06, "loss": 1.3332, "step": 2219 }, { "epoch": 0.6124137931034482, "grad_norm": 3.6141529083251953, "learning_rate": 9.857200837713146e-06, "loss": 1.2904, "step": 2220 }, { "epoch": 0.6126896551724138, "grad_norm": 3.856346845626831, "learning_rate": 9.856984282334254e-06, "loss": 1.1867, "step": 2221 }, { "epoch": 0.6129655172413793, "grad_norm": 3.340731620788574, "learning_rate": 9.856767565258706e-06, "loss": 1.1108, "step": 2222 }, { "epoch": 0.6132413793103448, "grad_norm": 3.8928043842315674, "learning_rate": 9.856550686493722e-06, "loss": 1.3551, "step": 2223 }, { "epoch": 0.6135172413793103, "grad_norm": 3.8641531467437744, "learning_rate": 9.856333646046514e-06, "loss": 1.2492, "step": 2224 }, { "epoch": 0.6137931034482759, "grad_norm": 3.808797597885132, "learning_rate": 9.856116443924314e-06, "loss": 1.2214, "step": 2225 }, { "epoch": 0.6140689655172413, "grad_norm": 3.848687171936035, "learning_rate": 9.85589908013435e-06, "loss": 1.4027, "step": 2226 }, { "epoch": 0.6143448275862069, "grad_norm": 4.18198299407959, "learning_rate": 9.85568155468386e-06, "loss": 1.406, "step": 2227 }, { "epoch": 0.6146206896551724, "grad_norm": 3.669980525970459, "learning_rate": 9.855463867580084e-06, "loss": 1.2494, "step": 2228 }, { "epoch": 0.6148965517241379, "grad_norm": 4.160665035247803, "learning_rate": 9.855246018830272e-06, "loss": 1.42, "step": 2229 }, { "epoch": 0.6151724137931035, "grad_norm": 3.5859618186950684, "learning_rate": 9.855028008441672e-06, "loss": 1.1011, "step": 2230 }, { "epoch": 0.615448275862069, "grad_norm": 3.595515489578247, "learning_rate": 9.854809836421546e-06, "loss": 1.3413, "step": 2231 }, { "epoch": 0.6157241379310345, "grad_norm": 3.76208233833313, "learning_rate": 9.854591502777156e-06, "loss": 1.2571, "step": 2232 }, { "epoch": 0.616, "grad_norm": 3.915081739425659, "learning_rate": 9.85437300751577e-06, "loss": 1.2471, "step": 2233 }, { "epoch": 0.6162758620689656, "grad_norm": 3.587536334991455, "learning_rate": 9.854154350644664e-06, "loss": 1.164, "step": 2234 }, { "epoch": 0.616551724137931, "grad_norm": 3.4271938800811768, "learning_rate": 9.853935532171115e-06, "loss": 1.0858, "step": 2235 }, { "epoch": 0.6168275862068966, "grad_norm": 3.911154270172119, "learning_rate": 9.85371655210241e-06, "loss": 1.3627, "step": 2236 }, { "epoch": 0.617103448275862, "grad_norm": 3.7924225330352783, "learning_rate": 9.853497410445837e-06, "loss": 1.4198, "step": 2237 }, { "epoch": 0.6173793103448276, "grad_norm": 3.808621406555176, "learning_rate": 9.853278107208693e-06, "loss": 1.2445, "step": 2238 }, { "epoch": 0.6176551724137931, "grad_norm": 4.201101303100586, "learning_rate": 9.853058642398279e-06, "loss": 1.358, "step": 2239 }, { "epoch": 0.6179310344827587, "grad_norm": 3.6479079723358154, "learning_rate": 9.852839016021901e-06, "loss": 1.1309, "step": 2240 }, { "epoch": 0.6182068965517241, "grad_norm": 3.772794723510742, "learning_rate": 9.852619228086869e-06, "loss": 1.2784, "step": 2241 }, { "epoch": 0.6184827586206897, "grad_norm": 3.546003580093384, "learning_rate": 9.852399278600503e-06, "loss": 1.2725, "step": 2242 }, { "epoch": 0.6187586206896551, "grad_norm": 3.668354034423828, "learning_rate": 9.852179167570123e-06, "loss": 1.305, "step": 2243 }, { "epoch": 0.6190344827586207, "grad_norm": 3.657787322998047, "learning_rate": 9.851958895003059e-06, "loss": 1.2737, "step": 2244 }, { "epoch": 0.6193103448275862, "grad_norm": 3.2778537273406982, "learning_rate": 9.851738460906644e-06, "loss": 1.1051, "step": 2245 }, { "epoch": 0.6195862068965517, "grad_norm": 4.3092360496521, "learning_rate": 9.851517865288215e-06, "loss": 1.4311, "step": 2246 }, { "epoch": 0.6198620689655172, "grad_norm": 3.8210387229919434, "learning_rate": 9.851297108155118e-06, "loss": 1.3164, "step": 2247 }, { "epoch": 0.6201379310344828, "grad_norm": 3.3689098358154297, "learning_rate": 9.8510761895147e-06, "loss": 1.2192, "step": 2248 }, { "epoch": 0.6204137931034482, "grad_norm": 3.507437229156494, "learning_rate": 9.850855109374318e-06, "loss": 1.2816, "step": 2249 }, { "epoch": 0.6206896551724138, "grad_norm": 3.8544235229492188, "learning_rate": 9.85063386774133e-06, "loss": 1.1946, "step": 2250 }, { "epoch": 0.6209655172413793, "grad_norm": 3.8186326026916504, "learning_rate": 9.850412464623102e-06, "loss": 1.3808, "step": 2251 }, { "epoch": 0.6212413793103448, "grad_norm": 3.7946834564208984, "learning_rate": 9.850190900027007e-06, "loss": 1.2933, "step": 2252 }, { "epoch": 0.6215172413793103, "grad_norm": 4.034345626831055, "learning_rate": 9.849969173960417e-06, "loss": 1.2787, "step": 2253 }, { "epoch": 0.6217931034482759, "grad_norm": 3.9628689289093018, "learning_rate": 9.849747286430717e-06, "loss": 1.3311, "step": 2254 }, { "epoch": 0.6220689655172413, "grad_norm": 3.8513102531433105, "learning_rate": 9.849525237445295e-06, "loss": 1.3227, "step": 2255 }, { "epoch": 0.6223448275862069, "grad_norm": 3.907231330871582, "learning_rate": 9.84930302701154e-06, "loss": 1.2024, "step": 2256 }, { "epoch": 0.6226206896551724, "grad_norm": 4.0941338539123535, "learning_rate": 9.849080655136853e-06, "loss": 1.2932, "step": 2257 }, { "epoch": 0.6228965517241379, "grad_norm": 3.3435802459716797, "learning_rate": 9.848858121828634e-06, "loss": 1.2301, "step": 2258 }, { "epoch": 0.6231724137931034, "grad_norm": 3.8807146549224854, "learning_rate": 9.848635427094293e-06, "loss": 1.3501, "step": 2259 }, { "epoch": 0.623448275862069, "grad_norm": 4.075613498687744, "learning_rate": 9.848412570941243e-06, "loss": 1.4462, "step": 2260 }, { "epoch": 0.6237241379310344, "grad_norm": 3.66688871383667, "learning_rate": 9.848189553376906e-06, "loss": 1.4177, "step": 2261 }, { "epoch": 0.624, "grad_norm": 4.045318126678467, "learning_rate": 9.847966374408702e-06, "loss": 1.3149, "step": 2262 }, { "epoch": 0.6242758620689656, "grad_norm": 4.0286712646484375, "learning_rate": 9.847743034044065e-06, "loss": 1.2701, "step": 2263 }, { "epoch": 0.624551724137931, "grad_norm": 3.765023708343506, "learning_rate": 9.847519532290428e-06, "loss": 1.2955, "step": 2264 }, { "epoch": 0.6248275862068966, "grad_norm": 3.534851551055908, "learning_rate": 9.847295869155233e-06, "loss": 1.241, "step": 2265 }, { "epoch": 0.6251034482758621, "grad_norm": 3.8248291015625, "learning_rate": 9.847072044645927e-06, "loss": 1.2799, "step": 2266 }, { "epoch": 0.6253793103448276, "grad_norm": 4.07189416885376, "learning_rate": 9.84684805876996e-06, "loss": 1.3887, "step": 2267 }, { "epoch": 0.6256551724137931, "grad_norm": 3.7145700454711914, "learning_rate": 9.846623911534787e-06, "loss": 1.3729, "step": 2268 }, { "epoch": 0.6259310344827587, "grad_norm": 3.6537024974823, "learning_rate": 9.846399602947874e-06, "loss": 1.3101, "step": 2269 }, { "epoch": 0.6262068965517241, "grad_norm": 3.7701563835144043, "learning_rate": 9.846175133016686e-06, "loss": 1.2341, "step": 2270 }, { "epoch": 0.6264827586206897, "grad_norm": 3.726518392562866, "learning_rate": 9.845950501748698e-06, "loss": 1.2948, "step": 2271 }, { "epoch": 0.6267586206896552, "grad_norm": 3.684256076812744, "learning_rate": 9.845725709151386e-06, "loss": 1.1443, "step": 2272 }, { "epoch": 0.6270344827586207, "grad_norm": 3.5361850261688232, "learning_rate": 9.845500755232236e-06, "loss": 1.1437, "step": 2273 }, { "epoch": 0.6273103448275862, "grad_norm": 3.7723631858825684, "learning_rate": 9.845275639998736e-06, "loss": 1.3537, "step": 2274 }, { "epoch": 0.6275862068965518, "grad_norm": 3.950538396835327, "learning_rate": 9.845050363458378e-06, "loss": 1.5924, "step": 2275 }, { "epoch": 0.6278620689655172, "grad_norm": 3.567970037460327, "learning_rate": 9.844824925618665e-06, "loss": 1.1922, "step": 2276 }, { "epoch": 0.6281379310344828, "grad_norm": 3.6519103050231934, "learning_rate": 9.844599326487102e-06, "loss": 1.3594, "step": 2277 }, { "epoch": 0.6284137931034482, "grad_norm": 3.7449300289154053, "learning_rate": 9.844373566071198e-06, "loss": 1.3074, "step": 2278 }, { "epoch": 0.6286896551724138, "grad_norm": 3.709765672683716, "learning_rate": 9.844147644378473e-06, "loss": 1.2179, "step": 2279 }, { "epoch": 0.6289655172413793, "grad_norm": 3.5012781620025635, "learning_rate": 9.843921561416441e-06, "loss": 1.1314, "step": 2280 }, { "epoch": 0.6292413793103449, "grad_norm": 3.702927589416504, "learning_rate": 9.843695317192634e-06, "loss": 1.2145, "step": 2281 }, { "epoch": 0.6295172413793103, "grad_norm": 3.59132981300354, "learning_rate": 9.843468911714584e-06, "loss": 1.2775, "step": 2282 }, { "epoch": 0.6297931034482759, "grad_norm": 3.601529598236084, "learning_rate": 9.843242344989827e-06, "loss": 1.237, "step": 2283 }, { "epoch": 0.6300689655172413, "grad_norm": 3.619217872619629, "learning_rate": 9.843015617025904e-06, "loss": 1.2384, "step": 2284 }, { "epoch": 0.6303448275862069, "grad_norm": 4.342235565185547, "learning_rate": 9.842788727830367e-06, "loss": 1.4054, "step": 2285 }, { "epoch": 0.6306206896551724, "grad_norm": 3.8367204666137695, "learning_rate": 9.842561677410767e-06, "loss": 1.1869, "step": 2286 }, { "epoch": 0.6308965517241379, "grad_norm": 3.96085524559021, "learning_rate": 9.842334465774663e-06, "loss": 1.3005, "step": 2287 }, { "epoch": 0.6311724137931034, "grad_norm": 3.683309316635132, "learning_rate": 9.84210709292962e-06, "loss": 1.2348, "step": 2288 }, { "epoch": 0.631448275862069, "grad_norm": 3.825726270675659, "learning_rate": 9.841879558883207e-06, "loss": 1.3656, "step": 2289 }, { "epoch": 0.6317241379310344, "grad_norm": 3.7567427158355713, "learning_rate": 9.841651863643e-06, "loss": 1.2897, "step": 2290 }, { "epoch": 0.632, "grad_norm": 3.831700563430786, "learning_rate": 9.841424007216578e-06, "loss": 1.319, "step": 2291 }, { "epoch": 0.6322758620689655, "grad_norm": 3.736757516860962, "learning_rate": 9.841195989611528e-06, "loss": 1.2646, "step": 2292 }, { "epoch": 0.632551724137931, "grad_norm": 3.8180506229400635, "learning_rate": 9.840967810835437e-06, "loss": 1.1564, "step": 2293 }, { "epoch": 0.6328275862068965, "grad_norm": 3.642453193664551, "learning_rate": 9.840739470895907e-06, "loss": 1.1238, "step": 2294 }, { "epoch": 0.6331034482758621, "grad_norm": 3.8250749111175537, "learning_rate": 9.840510969800538e-06, "loss": 1.4434, "step": 2295 }, { "epoch": 0.6333793103448276, "grad_norm": 4.018590927124023, "learning_rate": 9.840282307556937e-06, "loss": 1.3889, "step": 2296 }, { "epoch": 0.6336551724137931, "grad_norm": 3.4589223861694336, "learning_rate": 9.840053484172717e-06, "loss": 1.2254, "step": 2297 }, { "epoch": 0.6339310344827587, "grad_norm": 3.4353809356689453, "learning_rate": 9.839824499655494e-06, "loss": 1.1293, "step": 2298 }, { "epoch": 0.6342068965517241, "grad_norm": 4.0255632400512695, "learning_rate": 9.839595354012891e-06, "loss": 1.2243, "step": 2299 }, { "epoch": 0.6344827586206897, "grad_norm": 3.6208603382110596, "learning_rate": 9.839366047252539e-06, "loss": 1.3791, "step": 2300 }, { "epoch": 0.6347586206896552, "grad_norm": 3.7878987789154053, "learning_rate": 9.839136579382073e-06, "loss": 1.2502, "step": 2301 }, { "epoch": 0.6350344827586207, "grad_norm": 3.5406723022460938, "learning_rate": 9.838906950409129e-06, "loss": 1.2251, "step": 2302 }, { "epoch": 0.6353103448275862, "grad_norm": 3.975158214569092, "learning_rate": 9.838677160341351e-06, "loss": 1.439, "step": 2303 }, { "epoch": 0.6355862068965518, "grad_norm": 3.8303191661834717, "learning_rate": 9.838447209186393e-06, "loss": 1.2431, "step": 2304 }, { "epoch": 0.6358620689655172, "grad_norm": 3.969348669052124, "learning_rate": 9.83821709695191e-06, "loss": 1.3941, "step": 2305 }, { "epoch": 0.6361379310344828, "grad_norm": 3.8306849002838135, "learning_rate": 9.837986823645558e-06, "loss": 1.2166, "step": 2306 }, { "epoch": 0.6364137931034483, "grad_norm": 3.826780080795288, "learning_rate": 9.837756389275008e-06, "loss": 1.1396, "step": 2307 }, { "epoch": 0.6366896551724138, "grad_norm": 4.122424602508545, "learning_rate": 9.83752579384793e-06, "loss": 1.3493, "step": 2308 }, { "epoch": 0.6369655172413793, "grad_norm": 3.786207914352417, "learning_rate": 9.837295037372001e-06, "loss": 1.3936, "step": 2309 }, { "epoch": 0.6372413793103449, "grad_norm": 3.4695146083831787, "learning_rate": 9.837064119854904e-06, "loss": 1.2786, "step": 2310 }, { "epoch": 0.6375172413793103, "grad_norm": 3.8438522815704346, "learning_rate": 9.836833041304326e-06, "loss": 1.2965, "step": 2311 }, { "epoch": 0.6377931034482759, "grad_norm": 3.5109753608703613, "learning_rate": 9.836601801727958e-06, "loss": 1.331, "step": 2312 }, { "epoch": 0.6380689655172413, "grad_norm": 3.6077845096588135, "learning_rate": 9.836370401133502e-06, "loss": 1.246, "step": 2313 }, { "epoch": 0.6383448275862069, "grad_norm": 3.6733317375183105, "learning_rate": 9.836138839528657e-06, "loss": 1.1424, "step": 2314 }, { "epoch": 0.6386206896551724, "grad_norm": 3.5930769443511963, "learning_rate": 9.835907116921137e-06, "loss": 1.3428, "step": 2315 }, { "epoch": 0.638896551724138, "grad_norm": 3.787122964859009, "learning_rate": 9.835675233318654e-06, "loss": 1.2973, "step": 2316 }, { "epoch": 0.6391724137931034, "grad_norm": 3.9438726902008057, "learning_rate": 9.835443188728927e-06, "loss": 1.45, "step": 2317 }, { "epoch": 0.639448275862069, "grad_norm": 3.714207410812378, "learning_rate": 9.835210983159683e-06, "loss": 1.337, "step": 2318 }, { "epoch": 0.6397241379310344, "grad_norm": 3.9469423294067383, "learning_rate": 9.834978616618652e-06, "loss": 1.2828, "step": 2319 }, { "epoch": 0.64, "grad_norm": 3.4623799324035645, "learning_rate": 9.834746089113568e-06, "loss": 1.1354, "step": 2320 }, { "epoch": 0.6402758620689655, "grad_norm": 3.8109354972839355, "learning_rate": 9.834513400652175e-06, "loss": 1.2314, "step": 2321 }, { "epoch": 0.640551724137931, "grad_norm": 3.6837856769561768, "learning_rate": 9.834280551242217e-06, "loss": 1.3022, "step": 2322 }, { "epoch": 0.6408275862068965, "grad_norm": 3.7267189025878906, "learning_rate": 9.834047540891448e-06, "loss": 1.2883, "step": 2323 }, { "epoch": 0.6411034482758621, "grad_norm": 3.881258726119995, "learning_rate": 9.833814369607623e-06, "loss": 1.2999, "step": 2324 }, { "epoch": 0.6413793103448275, "grad_norm": 3.913843870162964, "learning_rate": 9.833581037398509e-06, "loss": 1.2836, "step": 2325 }, { "epoch": 0.6416551724137931, "grad_norm": 3.3849034309387207, "learning_rate": 9.833347544271869e-06, "loss": 1.2171, "step": 2326 }, { "epoch": 0.6419310344827586, "grad_norm": 3.8324832916259766, "learning_rate": 9.83311389023548e-06, "loss": 1.3419, "step": 2327 }, { "epoch": 0.6422068965517241, "grad_norm": 3.77484130859375, "learning_rate": 9.832880075297118e-06, "loss": 1.2391, "step": 2328 }, { "epoch": 0.6424827586206897, "grad_norm": 3.6735780239105225, "learning_rate": 9.832646099464567e-06, "loss": 1.1882, "step": 2329 }, { "epoch": 0.6427586206896552, "grad_norm": 3.6740472316741943, "learning_rate": 9.832411962745618e-06, "loss": 1.2528, "step": 2330 }, { "epoch": 0.6430344827586207, "grad_norm": 4.146139144897461, "learning_rate": 9.832177665148068e-06, "loss": 1.4199, "step": 2331 }, { "epoch": 0.6433103448275862, "grad_norm": 3.6738271713256836, "learning_rate": 9.831943206679713e-06, "loss": 1.2139, "step": 2332 }, { "epoch": 0.6435862068965518, "grad_norm": 3.823728561401367, "learning_rate": 9.83170858734836e-06, "loss": 1.1362, "step": 2333 }, { "epoch": 0.6438620689655172, "grad_norm": 3.860261917114258, "learning_rate": 9.831473807161819e-06, "loss": 1.4125, "step": 2334 }, { "epoch": 0.6441379310344828, "grad_norm": 3.4446659088134766, "learning_rate": 9.831238866127908e-06, "loss": 1.1606, "step": 2335 }, { "epoch": 0.6444137931034483, "grad_norm": 3.45593523979187, "learning_rate": 9.831003764254448e-06, "loss": 1.1952, "step": 2336 }, { "epoch": 0.6446896551724138, "grad_norm": 3.7377102375030518, "learning_rate": 9.830768501549264e-06, "loss": 1.2465, "step": 2337 }, { "epoch": 0.6449655172413793, "grad_norm": 3.793668031692505, "learning_rate": 9.83053307802019e-06, "loss": 1.2351, "step": 2338 }, { "epoch": 0.6452413793103449, "grad_norm": 3.676022529602051, "learning_rate": 9.830297493675066e-06, "loss": 1.2849, "step": 2339 }, { "epoch": 0.6455172413793103, "grad_norm": 3.765732526779175, "learning_rate": 9.830061748521729e-06, "loss": 1.323, "step": 2340 }, { "epoch": 0.6457931034482759, "grad_norm": 3.4733810424804688, "learning_rate": 9.829825842568032e-06, "loss": 1.2336, "step": 2341 }, { "epoch": 0.6460689655172414, "grad_norm": 3.7131831645965576, "learning_rate": 9.829589775821828e-06, "loss": 1.2276, "step": 2342 }, { "epoch": 0.6463448275862069, "grad_norm": 3.7520523071289062, "learning_rate": 9.829353548290977e-06, "loss": 1.3517, "step": 2343 }, { "epoch": 0.6466206896551724, "grad_norm": 3.8119959831237793, "learning_rate": 9.82911715998334e-06, "loss": 1.2576, "step": 2344 }, { "epoch": 0.646896551724138, "grad_norm": 3.6161131858825684, "learning_rate": 9.828880610906788e-06, "loss": 1.2409, "step": 2345 }, { "epoch": 0.6471724137931034, "grad_norm": 3.4409353733062744, "learning_rate": 9.828643901069198e-06, "loss": 1.1829, "step": 2346 }, { "epoch": 0.647448275862069, "grad_norm": 3.675607442855835, "learning_rate": 9.828407030478447e-06, "loss": 1.3243, "step": 2347 }, { "epoch": 0.6477241379310344, "grad_norm": 3.7395646572113037, "learning_rate": 9.828169999142425e-06, "loss": 1.3425, "step": 2348 }, { "epoch": 0.648, "grad_norm": 3.5119924545288086, "learning_rate": 9.82793280706902e-06, "loss": 1.2782, "step": 2349 }, { "epoch": 0.6482758620689655, "grad_norm": 3.659193992614746, "learning_rate": 9.82769545426613e-06, "loss": 1.279, "step": 2350 }, { "epoch": 0.648551724137931, "grad_norm": 3.7881386280059814, "learning_rate": 9.827457940741655e-06, "loss": 1.3293, "step": 2351 }, { "epoch": 0.6488275862068965, "grad_norm": 3.7873189449310303, "learning_rate": 9.827220266503507e-06, "loss": 1.3873, "step": 2352 }, { "epoch": 0.6491034482758621, "grad_norm": 3.5110533237457275, "learning_rate": 9.826982431559594e-06, "loss": 1.1783, "step": 2353 }, { "epoch": 0.6493793103448275, "grad_norm": 3.1332058906555176, "learning_rate": 9.826744435917833e-06, "loss": 1.1247, "step": 2354 }, { "epoch": 0.6496551724137931, "grad_norm": 3.8537683486938477, "learning_rate": 9.82650627958615e-06, "loss": 1.3575, "step": 2355 }, { "epoch": 0.6499310344827586, "grad_norm": 3.3641533851623535, "learning_rate": 9.826267962572474e-06, "loss": 1.3094, "step": 2356 }, { "epoch": 0.6502068965517241, "grad_norm": 3.556364059448242, "learning_rate": 9.826029484884737e-06, "loss": 1.2088, "step": 2357 }, { "epoch": 0.6504827586206896, "grad_norm": 3.471543788909912, "learning_rate": 9.82579084653088e-06, "loss": 1.179, "step": 2358 }, { "epoch": 0.6507586206896552, "grad_norm": 3.9988622665405273, "learning_rate": 9.825552047518845e-06, "loss": 1.247, "step": 2359 }, { "epoch": 0.6510344827586206, "grad_norm": 3.9567136764526367, "learning_rate": 9.825313087856584e-06, "loss": 1.2172, "step": 2360 }, { "epoch": 0.6513103448275862, "grad_norm": 4.098743438720703, "learning_rate": 9.825073967552054e-06, "loss": 1.2806, "step": 2361 }, { "epoch": 0.6515862068965518, "grad_norm": 3.5039567947387695, "learning_rate": 9.824834686613214e-06, "loss": 1.1939, "step": 2362 }, { "epoch": 0.6518620689655172, "grad_norm": 3.975294351577759, "learning_rate": 9.824595245048028e-06, "loss": 1.2606, "step": 2363 }, { "epoch": 0.6521379310344828, "grad_norm": 3.792093515396118, "learning_rate": 9.82435564286447e-06, "loss": 1.2503, "step": 2364 }, { "epoch": 0.6524137931034483, "grad_norm": 3.502873659133911, "learning_rate": 9.824115880070516e-06, "loss": 1.0154, "step": 2365 }, { "epoch": 0.6526896551724138, "grad_norm": 3.667646884918213, "learning_rate": 9.823875956674148e-06, "loss": 1.4537, "step": 2366 }, { "epoch": 0.6529655172413793, "grad_norm": 3.6530869007110596, "learning_rate": 9.823635872683354e-06, "loss": 1.2457, "step": 2367 }, { "epoch": 0.6532413793103449, "grad_norm": 3.2387630939483643, "learning_rate": 9.823395628106124e-06, "loss": 1.2861, "step": 2368 }, { "epoch": 0.6535172413793103, "grad_norm": 3.765941619873047, "learning_rate": 9.823155222950462e-06, "loss": 1.2665, "step": 2369 }, { "epoch": 0.6537931034482759, "grad_norm": 3.858210325241089, "learning_rate": 9.822914657224365e-06, "loss": 1.1284, "step": 2370 }, { "epoch": 0.6540689655172414, "grad_norm": 3.881412982940674, "learning_rate": 9.822673930935846e-06, "loss": 1.3515, "step": 2371 }, { "epoch": 0.6543448275862069, "grad_norm": 3.9063560962677, "learning_rate": 9.822433044092918e-06, "loss": 1.2445, "step": 2372 }, { "epoch": 0.6546206896551724, "grad_norm": 3.396120548248291, "learning_rate": 9.822191996703599e-06, "loss": 1.1946, "step": 2373 }, { "epoch": 0.654896551724138, "grad_norm": 3.7812726497650146, "learning_rate": 9.821950788775917e-06, "loss": 1.2046, "step": 2374 }, { "epoch": 0.6551724137931034, "grad_norm": 3.5588300228118896, "learning_rate": 9.8217094203179e-06, "loss": 1.2636, "step": 2375 }, { "epoch": 0.655448275862069, "grad_norm": 3.779311418533325, "learning_rate": 9.821467891337582e-06, "loss": 1.2051, "step": 2376 }, { "epoch": 0.6557241379310345, "grad_norm": 3.7352778911590576, "learning_rate": 9.821226201843008e-06, "loss": 1.273, "step": 2377 }, { "epoch": 0.656, "grad_norm": 3.621133804321289, "learning_rate": 9.82098435184222e-06, "loss": 1.2418, "step": 2378 }, { "epoch": 0.6562758620689655, "grad_norm": 3.7574069499969482, "learning_rate": 9.820742341343272e-06, "loss": 1.235, "step": 2379 }, { "epoch": 0.6565517241379311, "grad_norm": 3.84941029548645, "learning_rate": 9.820500170354221e-06, "loss": 1.1957, "step": 2380 }, { "epoch": 0.6568275862068965, "grad_norm": 3.4920947551727295, "learning_rate": 9.820257838883129e-06, "loss": 1.2815, "step": 2381 }, { "epoch": 0.6571034482758621, "grad_norm": 4.556122303009033, "learning_rate": 9.820015346938063e-06, "loss": 1.2715, "step": 2382 }, { "epoch": 0.6573793103448275, "grad_norm": 3.5152831077575684, "learning_rate": 9.819772694527097e-06, "loss": 1.2166, "step": 2383 }, { "epoch": 0.6576551724137931, "grad_norm": 3.405573606491089, "learning_rate": 9.819529881658308e-06, "loss": 1.2392, "step": 2384 }, { "epoch": 0.6579310344827586, "grad_norm": 3.759737968444824, "learning_rate": 9.81928690833978e-06, "loss": 1.2033, "step": 2385 }, { "epoch": 0.6582068965517242, "grad_norm": 3.8023715019226074, "learning_rate": 9.819043774579604e-06, "loss": 1.2895, "step": 2386 }, { "epoch": 0.6584827586206896, "grad_norm": 3.910212516784668, "learning_rate": 9.81880048038587e-06, "loss": 1.1678, "step": 2387 }, { "epoch": 0.6587586206896552, "grad_norm": 3.819761037826538, "learning_rate": 9.81855702576668e-06, "loss": 1.2983, "step": 2388 }, { "epoch": 0.6590344827586206, "grad_norm": 3.2969605922698975, "learning_rate": 9.81831341073014e-06, "loss": 1.1768, "step": 2389 }, { "epoch": 0.6593103448275862, "grad_norm": 3.1670544147491455, "learning_rate": 9.81806963528436e-06, "loss": 1.1563, "step": 2390 }, { "epoch": 0.6595862068965517, "grad_norm": 3.889129400253296, "learning_rate": 9.817825699437454e-06, "loss": 1.4089, "step": 2391 }, { "epoch": 0.6598620689655172, "grad_norm": 4.0903544425964355, "learning_rate": 9.817581603197544e-06, "loss": 1.3357, "step": 2392 }, { "epoch": 0.6601379310344827, "grad_norm": 4.435894966125488, "learning_rate": 9.817337346572757e-06, "loss": 1.1242, "step": 2393 }, { "epoch": 0.6604137931034483, "grad_norm": 3.9400224685668945, "learning_rate": 9.817092929571223e-06, "loss": 1.221, "step": 2394 }, { "epoch": 0.6606896551724138, "grad_norm": 3.693549633026123, "learning_rate": 9.816848352201082e-06, "loss": 1.2429, "step": 2395 }, { "epoch": 0.6609655172413793, "grad_norm": 3.6604878902435303, "learning_rate": 9.816603614470475e-06, "loss": 1.2519, "step": 2396 }, { "epoch": 0.6612413793103449, "grad_norm": 3.7245068550109863, "learning_rate": 9.816358716387546e-06, "loss": 1.3078, "step": 2397 }, { "epoch": 0.6615172413793103, "grad_norm": 3.697296142578125, "learning_rate": 9.816113657960453e-06, "loss": 1.1638, "step": 2398 }, { "epoch": 0.6617931034482759, "grad_norm": 3.6119368076324463, "learning_rate": 9.815868439197353e-06, "loss": 1.207, "step": 2399 }, { "epoch": 0.6620689655172414, "grad_norm": 3.710874319076538, "learning_rate": 9.81562306010641e-06, "loss": 1.2897, "step": 2400 }, { "epoch": 0.6623448275862069, "grad_norm": 3.590116500854492, "learning_rate": 9.81537752069579e-06, "loss": 1.1772, "step": 2401 }, { "epoch": 0.6626206896551724, "grad_norm": 3.6022684574127197, "learning_rate": 9.815131820973672e-06, "loss": 1.3522, "step": 2402 }, { "epoch": 0.662896551724138, "grad_norm": 3.5560224056243896, "learning_rate": 9.814885960948232e-06, "loss": 1.2089, "step": 2403 }, { "epoch": 0.6631724137931034, "grad_norm": 3.4723215103149414, "learning_rate": 9.81463994062766e-06, "loss": 1.2473, "step": 2404 }, { "epoch": 0.663448275862069, "grad_norm": 3.7164711952209473, "learning_rate": 9.814393760020139e-06, "loss": 1.2044, "step": 2405 }, { "epoch": 0.6637241379310345, "grad_norm": 3.692592144012451, "learning_rate": 9.814147419133871e-06, "loss": 1.2151, "step": 2406 }, { "epoch": 0.664, "grad_norm": 3.9811899662017822, "learning_rate": 9.813900917977053e-06, "loss": 1.2417, "step": 2407 }, { "epoch": 0.6642758620689655, "grad_norm": 3.7872211933135986, "learning_rate": 9.813654256557894e-06, "loss": 1.2414, "step": 2408 }, { "epoch": 0.6645517241379311, "grad_norm": 3.6556034088134766, "learning_rate": 9.813407434884605e-06, "loss": 1.2523, "step": 2409 }, { "epoch": 0.6648275862068965, "grad_norm": 3.626081705093384, "learning_rate": 9.813160452965404e-06, "loss": 1.3933, "step": 2410 }, { "epoch": 0.6651034482758621, "grad_norm": 3.986241579055786, "learning_rate": 9.812913310808512e-06, "loss": 1.2354, "step": 2411 }, { "epoch": 0.6653793103448276, "grad_norm": 3.774526596069336, "learning_rate": 9.812666008422157e-06, "loss": 1.3535, "step": 2412 }, { "epoch": 0.6656551724137931, "grad_norm": 3.600358486175537, "learning_rate": 9.812418545814572e-06, "loss": 1.2917, "step": 2413 }, { "epoch": 0.6659310344827586, "grad_norm": 3.8880155086517334, "learning_rate": 9.812170922993997e-06, "loss": 1.2989, "step": 2414 }, { "epoch": 0.6662068965517242, "grad_norm": 3.8515782356262207, "learning_rate": 9.811923139968674e-06, "loss": 1.3483, "step": 2415 }, { "epoch": 0.6664827586206896, "grad_norm": 3.6967849731445312, "learning_rate": 9.81167519674685e-06, "loss": 1.4009, "step": 2416 }, { "epoch": 0.6667586206896552, "grad_norm": 3.515038251876831, "learning_rate": 9.811427093336784e-06, "loss": 1.3382, "step": 2417 }, { "epoch": 0.6670344827586207, "grad_norm": 3.659207344055176, "learning_rate": 9.811178829746734e-06, "loss": 1.1796, "step": 2418 }, { "epoch": 0.6673103448275862, "grad_norm": 3.7421815395355225, "learning_rate": 9.810930405984967e-06, "loss": 1.2921, "step": 2419 }, { "epoch": 0.6675862068965517, "grad_norm": 3.98783802986145, "learning_rate": 9.810681822059748e-06, "loss": 1.3634, "step": 2420 }, { "epoch": 0.6678620689655173, "grad_norm": 3.527831792831421, "learning_rate": 9.810433077979356e-06, "loss": 1.2549, "step": 2421 }, { "epoch": 0.6681379310344827, "grad_norm": 3.8676466941833496, "learning_rate": 9.810184173752074e-06, "loss": 1.2281, "step": 2422 }, { "epoch": 0.6684137931034483, "grad_norm": 3.598001480102539, "learning_rate": 9.809935109386185e-06, "loss": 1.2652, "step": 2423 }, { "epoch": 0.6686896551724137, "grad_norm": 3.4421162605285645, "learning_rate": 9.809685884889983e-06, "loss": 1.2845, "step": 2424 }, { "epoch": 0.6689655172413793, "grad_norm": 3.6718404293060303, "learning_rate": 9.809436500271765e-06, "loss": 1.2823, "step": 2425 }, { "epoch": 0.6692413793103448, "grad_norm": 4.233999252319336, "learning_rate": 9.80918695553983e-06, "loss": 1.3438, "step": 2426 }, { "epoch": 0.6695172413793103, "grad_norm": 3.871509552001953, "learning_rate": 9.808937250702492e-06, "loss": 1.2762, "step": 2427 }, { "epoch": 0.6697931034482759, "grad_norm": 3.815518617630005, "learning_rate": 9.808687385768058e-06, "loss": 1.1651, "step": 2428 }, { "epoch": 0.6700689655172414, "grad_norm": 3.7234368324279785, "learning_rate": 9.808437360744851e-06, "loss": 1.3471, "step": 2429 }, { "epoch": 0.670344827586207, "grad_norm": 3.9327330589294434, "learning_rate": 9.80818717564119e-06, "loss": 1.321, "step": 2430 }, { "epoch": 0.6706206896551724, "grad_norm": 3.6065986156463623, "learning_rate": 9.80793683046541e-06, "loss": 1.3536, "step": 2431 }, { "epoch": 0.670896551724138, "grad_norm": 4.114263534545898, "learning_rate": 9.80768632522584e-06, "loss": 1.3136, "step": 2432 }, { "epoch": 0.6711724137931034, "grad_norm": 4.234928131103516, "learning_rate": 9.807435659930822e-06, "loss": 1.4108, "step": 2433 }, { "epoch": 0.671448275862069, "grad_norm": 3.819446325302124, "learning_rate": 9.807184834588701e-06, "loss": 1.3519, "step": 2434 }, { "epoch": 0.6717241379310345, "grad_norm": 3.709301233291626, "learning_rate": 9.806933849207828e-06, "loss": 1.2194, "step": 2435 }, { "epoch": 0.672, "grad_norm": 4.061258792877197, "learning_rate": 9.806682703796557e-06, "loss": 1.3062, "step": 2436 }, { "epoch": 0.6722758620689655, "grad_norm": 4.373748302459717, "learning_rate": 9.80643139836325e-06, "loss": 1.3503, "step": 2437 }, { "epoch": 0.6725517241379311, "grad_norm": 3.8994154930114746, "learning_rate": 9.806179932916273e-06, "loss": 1.2067, "step": 2438 }, { "epoch": 0.6728275862068965, "grad_norm": 4.183916091918945, "learning_rate": 9.805928307463997e-06, "loss": 1.2863, "step": 2439 }, { "epoch": 0.6731034482758621, "grad_norm": 3.9529669284820557, "learning_rate": 9.805676522014802e-06, "loss": 1.1819, "step": 2440 }, { "epoch": 0.6733793103448276, "grad_norm": 3.5027217864990234, "learning_rate": 9.805424576577067e-06, "loss": 1.2454, "step": 2441 }, { "epoch": 0.6736551724137931, "grad_norm": 3.89774489402771, "learning_rate": 9.805172471159184e-06, "loss": 1.2423, "step": 2442 }, { "epoch": 0.6739310344827586, "grad_norm": 3.678208827972412, "learning_rate": 9.804920205769539e-06, "loss": 1.2654, "step": 2443 }, { "epoch": 0.6742068965517242, "grad_norm": 3.9044764041900635, "learning_rate": 9.804667780416538e-06, "loss": 1.1979, "step": 2444 }, { "epoch": 0.6744827586206896, "grad_norm": 3.7236030101776123, "learning_rate": 9.804415195108576e-06, "loss": 1.3682, "step": 2445 }, { "epoch": 0.6747586206896552, "grad_norm": 3.4699525833129883, "learning_rate": 9.804162449854072e-06, "loss": 1.2354, "step": 2446 }, { "epoch": 0.6750344827586207, "grad_norm": 3.7407166957855225, "learning_rate": 9.803909544661432e-06, "loss": 1.4297, "step": 2447 }, { "epoch": 0.6753103448275862, "grad_norm": 3.7645161151885986, "learning_rate": 9.803656479539079e-06, "loss": 1.2914, "step": 2448 }, { "epoch": 0.6755862068965517, "grad_norm": 3.838195323944092, "learning_rate": 9.803403254495438e-06, "loss": 1.2328, "step": 2449 }, { "epoch": 0.6758620689655173, "grad_norm": 4.157896518707275, "learning_rate": 9.80314986953894e-06, "loss": 1.1537, "step": 2450 }, { "epoch": 0.6761379310344827, "grad_norm": 3.73016357421875, "learning_rate": 9.80289632467802e-06, "loss": 1.3838, "step": 2451 }, { "epoch": 0.6764137931034483, "grad_norm": 3.6705315113067627, "learning_rate": 9.802642619921117e-06, "loss": 1.2111, "step": 2452 }, { "epoch": 0.6766896551724138, "grad_norm": 3.670043706893921, "learning_rate": 9.80238875527668e-06, "loss": 1.3101, "step": 2453 }, { "epoch": 0.6769655172413793, "grad_norm": 3.5980112552642822, "learning_rate": 9.802134730753157e-06, "loss": 1.3205, "step": 2454 }, { "epoch": 0.6772413793103448, "grad_norm": 3.935845136642456, "learning_rate": 9.801880546359008e-06, "loss": 1.3672, "step": 2455 }, { "epoch": 0.6775172413793104, "grad_norm": 3.660048484802246, "learning_rate": 9.801626202102697e-06, "loss": 1.1748, "step": 2456 }, { "epoch": 0.6777931034482758, "grad_norm": 3.843552589416504, "learning_rate": 9.801371697992685e-06, "loss": 1.2336, "step": 2457 }, { "epoch": 0.6780689655172414, "grad_norm": 3.531619071960449, "learning_rate": 9.801117034037451e-06, "loss": 1.2667, "step": 2458 }, { "epoch": 0.6783448275862068, "grad_norm": 4.059255599975586, "learning_rate": 9.80086221024547e-06, "loss": 1.3067, "step": 2459 }, { "epoch": 0.6786206896551724, "grad_norm": 3.5614094734191895, "learning_rate": 9.800607226625227e-06, "loss": 1.1783, "step": 2460 }, { "epoch": 0.6788965517241379, "grad_norm": 3.5074775218963623, "learning_rate": 9.800352083185209e-06, "loss": 1.1886, "step": 2461 }, { "epoch": 0.6791724137931034, "grad_norm": 3.6954185962677, "learning_rate": 9.800096779933913e-06, "loss": 1.1363, "step": 2462 }, { "epoch": 0.679448275862069, "grad_norm": 3.7868165969848633, "learning_rate": 9.799841316879836e-06, "loss": 1.253, "step": 2463 }, { "epoch": 0.6797241379310345, "grad_norm": 3.679428815841675, "learning_rate": 9.799585694031484e-06, "loss": 1.1778, "step": 2464 }, { "epoch": 0.68, "grad_norm": 3.7877469062805176, "learning_rate": 9.799329911397365e-06, "loss": 1.2136, "step": 2465 }, { "epoch": 0.6802758620689655, "grad_norm": 3.8076374530792236, "learning_rate": 9.799073968985997e-06, "loss": 1.186, "step": 2466 }, { "epoch": 0.6805517241379311, "grad_norm": 3.8275930881500244, "learning_rate": 9.798817866805898e-06, "loss": 1.3186, "step": 2467 }, { "epoch": 0.6808275862068965, "grad_norm": 3.263319969177246, "learning_rate": 9.798561604865599e-06, "loss": 0.9824, "step": 2468 }, { "epoch": 0.6811034482758621, "grad_norm": 3.7573227882385254, "learning_rate": 9.798305183173627e-06, "loss": 1.3809, "step": 2469 }, { "epoch": 0.6813793103448276, "grad_norm": 3.7585291862487793, "learning_rate": 9.79804860173852e-06, "loss": 1.2934, "step": 2470 }, { "epoch": 0.6816551724137931, "grad_norm": 3.4934675693511963, "learning_rate": 9.797791860568817e-06, "loss": 1.2674, "step": 2471 }, { "epoch": 0.6819310344827586, "grad_norm": 3.55423641204834, "learning_rate": 9.797534959673071e-06, "loss": 1.1803, "step": 2472 }, { "epoch": 0.6822068965517242, "grad_norm": 3.7543869018554688, "learning_rate": 9.79727789905983e-06, "loss": 1.3377, "step": 2473 }, { "epoch": 0.6824827586206896, "grad_norm": 3.753429889678955, "learning_rate": 9.797020678737656e-06, "loss": 1.2735, "step": 2474 }, { "epoch": 0.6827586206896552, "grad_norm": 3.6018757820129395, "learning_rate": 9.796763298715108e-06, "loss": 1.3364, "step": 2475 }, { "epoch": 0.6830344827586207, "grad_norm": 3.4998364448547363, "learning_rate": 9.796505759000758e-06, "loss": 1.2132, "step": 2476 }, { "epoch": 0.6833103448275862, "grad_norm": 3.554687261581421, "learning_rate": 9.796248059603177e-06, "loss": 1.2963, "step": 2477 }, { "epoch": 0.6835862068965517, "grad_norm": 3.562500238418579, "learning_rate": 9.795990200530947e-06, "loss": 1.3232, "step": 2478 }, { "epoch": 0.6838620689655173, "grad_norm": 3.4793429374694824, "learning_rate": 9.79573218179265e-06, "loss": 1.3076, "step": 2479 }, { "epoch": 0.6841379310344827, "grad_norm": 3.567596197128296, "learning_rate": 9.795474003396877e-06, "loss": 1.2304, "step": 2480 }, { "epoch": 0.6844137931034483, "grad_norm": 3.4124197959899902, "learning_rate": 9.795215665352225e-06, "loss": 1.1618, "step": 2481 }, { "epoch": 0.6846896551724138, "grad_norm": 3.6309595108032227, "learning_rate": 9.794957167667291e-06, "loss": 1.201, "step": 2482 }, { "epoch": 0.6849655172413793, "grad_norm": 4.134853839874268, "learning_rate": 9.794698510350684e-06, "loss": 1.3574, "step": 2483 }, { "epoch": 0.6852413793103448, "grad_norm": 3.6518895626068115, "learning_rate": 9.794439693411014e-06, "loss": 1.3463, "step": 2484 }, { "epoch": 0.6855172413793104, "grad_norm": 3.8799736499786377, "learning_rate": 9.794180716856894e-06, "loss": 1.2225, "step": 2485 }, { "epoch": 0.6857931034482758, "grad_norm": 3.6079001426696777, "learning_rate": 9.793921580696951e-06, "loss": 1.1988, "step": 2486 }, { "epoch": 0.6860689655172414, "grad_norm": 3.632833957672119, "learning_rate": 9.793662284939808e-06, "loss": 1.0862, "step": 2487 }, { "epoch": 0.6863448275862069, "grad_norm": 3.731318950653076, "learning_rate": 9.793402829594101e-06, "loss": 1.3191, "step": 2488 }, { "epoch": 0.6866206896551724, "grad_norm": 3.1975696086883545, "learning_rate": 9.793143214668466e-06, "loss": 1.0975, "step": 2489 }, { "epoch": 0.6868965517241379, "grad_norm": 3.8954319953918457, "learning_rate": 9.792883440171546e-06, "loss": 1.2811, "step": 2490 }, { "epoch": 0.6871724137931035, "grad_norm": 4.070486545562744, "learning_rate": 9.79262350611199e-06, "loss": 1.3375, "step": 2491 }, { "epoch": 0.6874482758620689, "grad_norm": 3.7794289588928223, "learning_rate": 9.792363412498447e-06, "loss": 1.257, "step": 2492 }, { "epoch": 0.6877241379310345, "grad_norm": 3.5174717903137207, "learning_rate": 9.792103159339583e-06, "loss": 1.2879, "step": 2493 }, { "epoch": 0.688, "grad_norm": 3.733448028564453, "learning_rate": 9.791842746644057e-06, "loss": 1.2529, "step": 2494 }, { "epoch": 0.6882758620689655, "grad_norm": 3.54091739654541, "learning_rate": 9.791582174420542e-06, "loss": 1.2644, "step": 2495 }, { "epoch": 0.6885517241379311, "grad_norm": 3.5706002712249756, "learning_rate": 9.79132144267771e-06, "loss": 1.3287, "step": 2496 }, { "epoch": 0.6888275862068965, "grad_norm": 3.888655424118042, "learning_rate": 9.791060551424245e-06, "loss": 1.3219, "step": 2497 }, { "epoch": 0.6891034482758621, "grad_norm": 3.45448637008667, "learning_rate": 9.790799500668827e-06, "loss": 1.1869, "step": 2498 }, { "epoch": 0.6893793103448276, "grad_norm": 3.459752082824707, "learning_rate": 9.790538290420152e-06, "loss": 1.204, "step": 2499 }, { "epoch": 0.6896551724137931, "grad_norm": 3.311349391937256, "learning_rate": 9.790276920686912e-06, "loss": 1.167, "step": 2500 }, { "epoch": 0.6896551724137931, "eval_loss": 1.2759681940078735, "eval_runtime": 11.4648, "eval_samples_per_second": 34.889, "eval_steps_per_second": 4.361, "step": 2500 }, { "epoch": 0.6899310344827586, "grad_norm": 3.5636727809906006, "learning_rate": 9.790015391477812e-06, "loss": 1.2916, "step": 2501 }, { "epoch": 0.6902068965517242, "grad_norm": 3.6978108882904053, "learning_rate": 9.789753702801556e-06, "loss": 1.3083, "step": 2502 }, { "epoch": 0.6904827586206896, "grad_norm": 3.7050979137420654, "learning_rate": 9.789491854666858e-06, "loss": 1.4327, "step": 2503 }, { "epoch": 0.6907586206896552, "grad_norm": 3.6344714164733887, "learning_rate": 9.789229847082433e-06, "loss": 1.3309, "step": 2504 }, { "epoch": 0.6910344827586207, "grad_norm": 3.2131032943725586, "learning_rate": 9.788967680057007e-06, "loss": 1.1325, "step": 2505 }, { "epoch": 0.6913103448275862, "grad_norm": 3.6126749515533447, "learning_rate": 9.788705353599303e-06, "loss": 1.1634, "step": 2506 }, { "epoch": 0.6915862068965517, "grad_norm": 3.8888823986053467, "learning_rate": 9.78844286771806e-06, "loss": 1.2776, "step": 2507 }, { "epoch": 0.6918620689655173, "grad_norm": 3.842585802078247, "learning_rate": 9.788180222422013e-06, "loss": 1.1363, "step": 2508 }, { "epoch": 0.6921379310344827, "grad_norm": 3.789750576019287, "learning_rate": 9.787917417719907e-06, "loss": 1.3481, "step": 2509 }, { "epoch": 0.6924137931034483, "grad_norm": 3.927962064743042, "learning_rate": 9.787654453620489e-06, "loss": 1.257, "step": 2510 }, { "epoch": 0.6926896551724138, "grad_norm": 4.064880847930908, "learning_rate": 9.787391330132518e-06, "loss": 1.3148, "step": 2511 }, { "epoch": 0.6929655172413793, "grad_norm": 3.667071580886841, "learning_rate": 9.78712804726475e-06, "loss": 1.3031, "step": 2512 }, { "epoch": 0.6932413793103448, "grad_norm": 3.4818899631500244, "learning_rate": 9.786864605025952e-06, "loss": 1.2328, "step": 2513 }, { "epoch": 0.6935172413793104, "grad_norm": 3.3729069232940674, "learning_rate": 9.786601003424891e-06, "loss": 1.2426, "step": 2514 }, { "epoch": 0.6937931034482758, "grad_norm": 3.5572011470794678, "learning_rate": 9.786337242470348e-06, "loss": 1.2781, "step": 2515 }, { "epoch": 0.6940689655172414, "grad_norm": 3.8967936038970947, "learning_rate": 9.786073322171101e-06, "loss": 1.2237, "step": 2516 }, { "epoch": 0.6943448275862069, "grad_norm": 3.7102508544921875, "learning_rate": 9.785809242535936e-06, "loss": 1.3598, "step": 2517 }, { "epoch": 0.6946206896551724, "grad_norm": 3.6175789833068848, "learning_rate": 9.785545003573647e-06, "loss": 1.3945, "step": 2518 }, { "epoch": 0.6948965517241379, "grad_norm": 3.7755961418151855, "learning_rate": 9.785280605293027e-06, "loss": 1.2451, "step": 2519 }, { "epoch": 0.6951724137931035, "grad_norm": 3.5082449913024902, "learning_rate": 9.785016047702882e-06, "loss": 1.1048, "step": 2520 }, { "epoch": 0.6954482758620689, "grad_norm": 3.4236385822296143, "learning_rate": 9.784751330812017e-06, "loss": 1.2763, "step": 2521 }, { "epoch": 0.6957241379310345, "grad_norm": 3.147923231124878, "learning_rate": 9.784486454629247e-06, "loss": 1.0278, "step": 2522 }, { "epoch": 0.696, "grad_norm": 3.4799673557281494, "learning_rate": 9.78422141916339e-06, "loss": 1.1546, "step": 2523 }, { "epoch": 0.6962758620689655, "grad_norm": 3.451634168624878, "learning_rate": 9.783956224423266e-06, "loss": 1.0297, "step": 2524 }, { "epoch": 0.696551724137931, "grad_norm": 3.5772955417633057, "learning_rate": 9.783690870417706e-06, "loss": 1.0583, "step": 2525 }, { "epoch": 0.6968275862068966, "grad_norm": 3.5954458713531494, "learning_rate": 9.783425357155546e-06, "loss": 1.1665, "step": 2526 }, { "epoch": 0.697103448275862, "grad_norm": 3.6120457649230957, "learning_rate": 9.783159684645623e-06, "loss": 1.3625, "step": 2527 }, { "epoch": 0.6973793103448276, "grad_norm": 3.6384427547454834, "learning_rate": 9.782893852896782e-06, "loss": 1.1776, "step": 2528 }, { "epoch": 0.6976551724137932, "grad_norm": 4.099461078643799, "learning_rate": 9.782627861917875e-06, "loss": 1.3187, "step": 2529 }, { "epoch": 0.6979310344827586, "grad_norm": 3.7423276901245117, "learning_rate": 9.782361711717753e-06, "loss": 1.2214, "step": 2530 }, { "epoch": 0.6982068965517242, "grad_norm": 3.850670337677002, "learning_rate": 9.78209540230528e-06, "loss": 1.2591, "step": 2531 }, { "epoch": 0.6984827586206896, "grad_norm": 4.622014045715332, "learning_rate": 9.781828933689322e-06, "loss": 1.2064, "step": 2532 }, { "epoch": 0.6987586206896552, "grad_norm": 3.681126356124878, "learning_rate": 9.78156230587875e-06, "loss": 1.1502, "step": 2533 }, { "epoch": 0.6990344827586207, "grad_norm": 3.5460917949676514, "learning_rate": 9.781295518882436e-06, "loss": 1.3248, "step": 2534 }, { "epoch": 0.6993103448275862, "grad_norm": 3.553776741027832, "learning_rate": 9.781028572709267e-06, "loss": 1.2013, "step": 2535 }, { "epoch": 0.6995862068965517, "grad_norm": 3.964977264404297, "learning_rate": 9.78076146736813e-06, "loss": 1.4242, "step": 2536 }, { "epoch": 0.6998620689655173, "grad_norm": 3.528571605682373, "learning_rate": 9.780494202867914e-06, "loss": 1.2904, "step": 2537 }, { "epoch": 0.7001379310344827, "grad_norm": 3.480093002319336, "learning_rate": 9.780226779217519e-06, "loss": 1.3189, "step": 2538 }, { "epoch": 0.7004137931034483, "grad_norm": 3.744272232055664, "learning_rate": 9.779959196425845e-06, "loss": 1.143, "step": 2539 }, { "epoch": 0.7006896551724138, "grad_norm": 3.878680467605591, "learning_rate": 9.779691454501805e-06, "loss": 1.3985, "step": 2540 }, { "epoch": 0.7009655172413793, "grad_norm": 3.331270217895508, "learning_rate": 9.77942355345431e-06, "loss": 1.0887, "step": 2541 }, { "epoch": 0.7012413793103448, "grad_norm": 3.7423720359802246, "learning_rate": 9.77915549329228e-06, "loss": 1.3126, "step": 2542 }, { "epoch": 0.7015172413793104, "grad_norm": 3.7737584114074707, "learning_rate": 9.778887274024635e-06, "loss": 1.2851, "step": 2543 }, { "epoch": 0.7017931034482758, "grad_norm": 3.6910767555236816, "learning_rate": 9.77861889566031e-06, "loss": 1.1823, "step": 2544 }, { "epoch": 0.7020689655172414, "grad_norm": 3.4800195693969727, "learning_rate": 9.778350358208237e-06, "loss": 1.2344, "step": 2545 }, { "epoch": 0.7023448275862069, "grad_norm": 3.4953458309173584, "learning_rate": 9.778081661677356e-06, "loss": 1.1656, "step": 2546 }, { "epoch": 0.7026206896551724, "grad_norm": 3.827366828918457, "learning_rate": 9.777812806076613e-06, "loss": 1.3093, "step": 2547 }, { "epoch": 0.7028965517241379, "grad_norm": 4.199925899505615, "learning_rate": 9.777543791414958e-06, "loss": 1.2218, "step": 2548 }, { "epoch": 0.7031724137931035, "grad_norm": 3.7511370182037354, "learning_rate": 9.777274617701348e-06, "loss": 1.2376, "step": 2549 }, { "epoch": 0.7034482758620689, "grad_norm": 3.323103666305542, "learning_rate": 9.777005284944743e-06, "loss": 1.1391, "step": 2550 }, { "epoch": 0.7037241379310345, "grad_norm": 3.9292643070220947, "learning_rate": 9.776735793154108e-06, "loss": 1.3385, "step": 2551 }, { "epoch": 0.704, "grad_norm": 3.720283269882202, "learning_rate": 9.77646614233842e-06, "loss": 1.2974, "step": 2552 }, { "epoch": 0.7042758620689655, "grad_norm": 4.039552688598633, "learning_rate": 9.776196332506651e-06, "loss": 1.2666, "step": 2553 }, { "epoch": 0.704551724137931, "grad_norm": 3.848893880844116, "learning_rate": 9.775926363667785e-06, "loss": 1.1359, "step": 2554 }, { "epoch": 0.7048275862068966, "grad_norm": 3.3662662506103516, "learning_rate": 9.775656235830811e-06, "loss": 1.2293, "step": 2555 }, { "epoch": 0.705103448275862, "grad_norm": 3.515021800994873, "learning_rate": 9.775385949004722e-06, "loss": 1.1268, "step": 2556 }, { "epoch": 0.7053793103448276, "grad_norm": 3.6893131732940674, "learning_rate": 9.775115503198513e-06, "loss": 1.4316, "step": 2557 }, { "epoch": 0.705655172413793, "grad_norm": 3.4808242321014404, "learning_rate": 9.77484489842119e-06, "loss": 1.135, "step": 2558 }, { "epoch": 0.7059310344827586, "grad_norm": 3.8749804496765137, "learning_rate": 9.774574134681762e-06, "loss": 1.2562, "step": 2559 }, { "epoch": 0.7062068965517241, "grad_norm": 3.383525848388672, "learning_rate": 9.774303211989242e-06, "loss": 1.2517, "step": 2560 }, { "epoch": 0.7064827586206897, "grad_norm": 3.6913440227508545, "learning_rate": 9.774032130352652e-06, "loss": 1.1474, "step": 2561 }, { "epoch": 0.7067586206896552, "grad_norm": 3.7156996726989746, "learning_rate": 9.773760889781015e-06, "loss": 1.1957, "step": 2562 }, { "epoch": 0.7070344827586207, "grad_norm": 3.925180673599243, "learning_rate": 9.773489490283358e-06, "loss": 1.3119, "step": 2563 }, { "epoch": 0.7073103448275863, "grad_norm": 3.2721338272094727, "learning_rate": 9.773217931868723e-06, "loss": 1.2018, "step": 2564 }, { "epoch": 0.7075862068965517, "grad_norm": 3.5486645698547363, "learning_rate": 9.772946214546144e-06, "loss": 1.2389, "step": 2565 }, { "epoch": 0.7078620689655173, "grad_norm": 3.574848175048828, "learning_rate": 9.772674338324672e-06, "loss": 1.2109, "step": 2566 }, { "epoch": 0.7081379310344827, "grad_norm": 3.7289772033691406, "learning_rate": 9.772402303213355e-06, "loss": 1.238, "step": 2567 }, { "epoch": 0.7084137931034483, "grad_norm": 3.6885910034179688, "learning_rate": 9.772130109221249e-06, "loss": 1.366, "step": 2568 }, { "epoch": 0.7086896551724138, "grad_norm": 3.9266228675842285, "learning_rate": 9.771857756357421e-06, "loss": 1.3067, "step": 2569 }, { "epoch": 0.7089655172413794, "grad_norm": 3.3877861499786377, "learning_rate": 9.771585244630932e-06, "loss": 1.2338, "step": 2570 }, { "epoch": 0.7092413793103448, "grad_norm": 3.588575601577759, "learning_rate": 9.771312574050856e-06, "loss": 1.3356, "step": 2571 }, { "epoch": 0.7095172413793104, "grad_norm": 3.523791551589966, "learning_rate": 9.771039744626271e-06, "loss": 1.2713, "step": 2572 }, { "epoch": 0.7097931034482758, "grad_norm": 3.7132577896118164, "learning_rate": 9.770766756366261e-06, "loss": 1.2111, "step": 2573 }, { "epoch": 0.7100689655172414, "grad_norm": 4.1563639640808105, "learning_rate": 9.770493609279915e-06, "loss": 1.2836, "step": 2574 }, { "epoch": 0.7103448275862069, "grad_norm": 3.385124921798706, "learning_rate": 9.770220303376324e-06, "loss": 1.145, "step": 2575 }, { "epoch": 0.7106206896551724, "grad_norm": 3.5507915019989014, "learning_rate": 9.769946838664586e-06, "loss": 1.1218, "step": 2576 }, { "epoch": 0.7108965517241379, "grad_norm": 3.55769419670105, "learning_rate": 9.769673215153807e-06, "loss": 1.1681, "step": 2577 }, { "epoch": 0.7111724137931035, "grad_norm": 3.654956102371216, "learning_rate": 9.769399432853097e-06, "loss": 1.2821, "step": 2578 }, { "epoch": 0.7114482758620689, "grad_norm": 3.8053929805755615, "learning_rate": 9.769125491771569e-06, "loss": 1.2771, "step": 2579 }, { "epoch": 0.7117241379310345, "grad_norm": 3.570155143737793, "learning_rate": 9.768851391918346e-06, "loss": 1.3439, "step": 2580 }, { "epoch": 0.712, "grad_norm": 3.8212194442749023, "learning_rate": 9.768577133302547e-06, "loss": 1.4154, "step": 2581 }, { "epoch": 0.7122758620689655, "grad_norm": 3.6701712608337402, "learning_rate": 9.768302715933309e-06, "loss": 1.3031, "step": 2582 }, { "epoch": 0.712551724137931, "grad_norm": 3.568199634552002, "learning_rate": 9.768028139819764e-06, "loss": 1.1446, "step": 2583 }, { "epoch": 0.7128275862068966, "grad_norm": 3.5012619495391846, "learning_rate": 9.767753404971056e-06, "loss": 1.2383, "step": 2584 }, { "epoch": 0.713103448275862, "grad_norm": 3.469939947128296, "learning_rate": 9.767478511396329e-06, "loss": 1.3179, "step": 2585 }, { "epoch": 0.7133793103448276, "grad_norm": 3.4634125232696533, "learning_rate": 9.767203459104734e-06, "loss": 1.3145, "step": 2586 }, { "epoch": 0.7136551724137931, "grad_norm": 3.78296160697937, "learning_rate": 9.766928248105431e-06, "loss": 1.3234, "step": 2587 }, { "epoch": 0.7139310344827586, "grad_norm": 3.5550854206085205, "learning_rate": 9.76665287840758e-06, "loss": 1.1869, "step": 2588 }, { "epoch": 0.7142068965517241, "grad_norm": 3.4866480827331543, "learning_rate": 9.766377350020349e-06, "loss": 1.1989, "step": 2589 }, { "epoch": 0.7144827586206897, "grad_norm": 3.719688653945923, "learning_rate": 9.766101662952909e-06, "loss": 1.1841, "step": 2590 }, { "epoch": 0.7147586206896551, "grad_norm": 3.9979162216186523, "learning_rate": 9.76582581721444e-06, "loss": 1.4653, "step": 2591 }, { "epoch": 0.7150344827586207, "grad_norm": 3.7058169841766357, "learning_rate": 9.765549812814126e-06, "loss": 1.188, "step": 2592 }, { "epoch": 0.7153103448275862, "grad_norm": 3.828197479248047, "learning_rate": 9.765273649761155e-06, "loss": 1.4507, "step": 2593 }, { "epoch": 0.7155862068965517, "grad_norm": 3.7839694023132324, "learning_rate": 9.764997328064721e-06, "loss": 1.3612, "step": 2594 }, { "epoch": 0.7158620689655173, "grad_norm": 3.8582746982574463, "learning_rate": 9.764720847734022e-06, "loss": 1.1939, "step": 2595 }, { "epoch": 0.7161379310344828, "grad_norm": 3.713965654373169, "learning_rate": 9.764444208778263e-06, "loss": 1.2281, "step": 2596 }, { "epoch": 0.7164137931034483, "grad_norm": 4.151169776916504, "learning_rate": 9.764167411206652e-06, "loss": 1.3618, "step": 2597 }, { "epoch": 0.7166896551724138, "grad_norm": 3.587663173675537, "learning_rate": 9.763890455028409e-06, "loss": 1.2247, "step": 2598 }, { "epoch": 0.7169655172413794, "grad_norm": 3.336977481842041, "learning_rate": 9.76361334025275e-06, "loss": 1.2073, "step": 2599 }, { "epoch": 0.7172413793103448, "grad_norm": 4.148961067199707, "learning_rate": 9.763336066888901e-06, "loss": 1.2242, "step": 2600 }, { "epoch": 0.7175172413793104, "grad_norm": 3.776655912399292, "learning_rate": 9.763058634946097e-06, "loss": 1.1996, "step": 2601 }, { "epoch": 0.7177931034482758, "grad_norm": 3.7899723052978516, "learning_rate": 9.762781044433567e-06, "loss": 1.1839, "step": 2602 }, { "epoch": 0.7180689655172414, "grad_norm": 3.5001158714294434, "learning_rate": 9.76250329536056e-06, "loss": 1.2129, "step": 2603 }, { "epoch": 0.7183448275862069, "grad_norm": 3.332772731781006, "learning_rate": 9.762225387736314e-06, "loss": 1.1334, "step": 2604 }, { "epoch": 0.7186206896551725, "grad_norm": 3.487638235092163, "learning_rate": 9.761947321570091e-06, "loss": 1.2933, "step": 2605 }, { "epoch": 0.7188965517241379, "grad_norm": 3.5397984981536865, "learning_rate": 9.76166909687114e-06, "loss": 1.2437, "step": 2606 }, { "epoch": 0.7191724137931035, "grad_norm": 3.7372138500213623, "learning_rate": 9.761390713648728e-06, "loss": 1.292, "step": 2607 }, { "epoch": 0.7194482758620689, "grad_norm": 3.8430073261260986, "learning_rate": 9.761112171912123e-06, "loss": 1.2223, "step": 2608 }, { "epoch": 0.7197241379310345, "grad_norm": 3.5610969066619873, "learning_rate": 9.760833471670595e-06, "loss": 1.1972, "step": 2609 }, { "epoch": 0.72, "grad_norm": 3.8206262588500977, "learning_rate": 9.760554612933426e-06, "loss": 1.369, "step": 2610 }, { "epoch": 0.7202758620689655, "grad_norm": 3.4753129482269287, "learning_rate": 9.760275595709897e-06, "loss": 1.2671, "step": 2611 }, { "epoch": 0.720551724137931, "grad_norm": 3.820251226425171, "learning_rate": 9.759996420009295e-06, "loss": 1.1423, "step": 2612 }, { "epoch": 0.7208275862068966, "grad_norm": 3.537790298461914, "learning_rate": 9.759717085840918e-06, "loss": 1.2162, "step": 2613 }, { "epoch": 0.721103448275862, "grad_norm": 3.644855499267578, "learning_rate": 9.759437593214066e-06, "loss": 1.289, "step": 2614 }, { "epoch": 0.7213793103448276, "grad_norm": 3.6225810050964355, "learning_rate": 9.759157942138042e-06, "loss": 1.2729, "step": 2615 }, { "epoch": 0.7216551724137931, "grad_norm": 3.627703905105591, "learning_rate": 9.758878132622155e-06, "loss": 1.4156, "step": 2616 }, { "epoch": 0.7219310344827586, "grad_norm": 3.705207347869873, "learning_rate": 9.75859816467572e-06, "loss": 1.3126, "step": 2617 }, { "epoch": 0.7222068965517241, "grad_norm": 3.4443013668060303, "learning_rate": 9.758318038308061e-06, "loss": 1.1781, "step": 2618 }, { "epoch": 0.7224827586206897, "grad_norm": 3.499222993850708, "learning_rate": 9.758037753528502e-06, "loss": 1.4254, "step": 2619 }, { "epoch": 0.7227586206896551, "grad_norm": 3.5431923866271973, "learning_rate": 9.757757310346374e-06, "loss": 1.3296, "step": 2620 }, { "epoch": 0.7230344827586207, "grad_norm": 3.7639946937561035, "learning_rate": 9.757476708771013e-06, "loss": 1.3652, "step": 2621 }, { "epoch": 0.7233103448275862, "grad_norm": 3.806159734725952, "learning_rate": 9.75719594881176e-06, "loss": 1.283, "step": 2622 }, { "epoch": 0.7235862068965517, "grad_norm": 3.5270612239837646, "learning_rate": 9.756915030477964e-06, "loss": 1.3114, "step": 2623 }, { "epoch": 0.7238620689655172, "grad_norm": 3.935499668121338, "learning_rate": 9.756633953778976e-06, "loss": 1.3708, "step": 2624 }, { "epoch": 0.7241379310344828, "grad_norm": 3.8617026805877686, "learning_rate": 9.756352718724153e-06, "loss": 1.2932, "step": 2625 }, { "epoch": 0.7244137931034482, "grad_norm": 3.588747978210449, "learning_rate": 9.75607132532286e-06, "loss": 1.2617, "step": 2626 }, { "epoch": 0.7246896551724138, "grad_norm": 4.111470699310303, "learning_rate": 9.755789773584463e-06, "loss": 1.3595, "step": 2627 }, { "epoch": 0.7249655172413794, "grad_norm": 3.792409896850586, "learning_rate": 9.755508063518335e-06, "loss": 1.3595, "step": 2628 }, { "epoch": 0.7252413793103448, "grad_norm": 3.72040057182312, "learning_rate": 9.755226195133855e-06, "loss": 1.3992, "step": 2629 }, { "epoch": 0.7255172413793104, "grad_norm": 3.3347976207733154, "learning_rate": 9.754944168440409e-06, "loss": 1.1193, "step": 2630 }, { "epoch": 0.7257931034482759, "grad_norm": 3.9602975845336914, "learning_rate": 9.754661983447381e-06, "loss": 1.2768, "step": 2631 }, { "epoch": 0.7260689655172414, "grad_norm": 3.6338768005371094, "learning_rate": 9.754379640164171e-06, "loss": 1.1232, "step": 2632 }, { "epoch": 0.7263448275862069, "grad_norm": 3.5226058959960938, "learning_rate": 9.754097138600176e-06, "loss": 1.2078, "step": 2633 }, { "epoch": 0.7266206896551725, "grad_norm": 4.270612716674805, "learning_rate": 9.7538144787648e-06, "loss": 1.1468, "step": 2634 }, { "epoch": 0.7268965517241379, "grad_norm": 3.672724962234497, "learning_rate": 9.753531660667456e-06, "loss": 1.2211, "step": 2635 }, { "epoch": 0.7271724137931035, "grad_norm": 3.879585027694702, "learning_rate": 9.753248684317557e-06, "loss": 1.2293, "step": 2636 }, { "epoch": 0.727448275862069, "grad_norm": 3.3979525566101074, "learning_rate": 9.752965549724526e-06, "loss": 1.1393, "step": 2637 }, { "epoch": 0.7277241379310345, "grad_norm": 3.5870392322540283, "learning_rate": 9.752682256897784e-06, "loss": 1.1496, "step": 2638 }, { "epoch": 0.728, "grad_norm": 3.5198967456817627, "learning_rate": 9.75239880584677e-06, "loss": 1.2428, "step": 2639 }, { "epoch": 0.7282758620689656, "grad_norm": 4.090736389160156, "learning_rate": 9.752115196580916e-06, "loss": 1.3474, "step": 2640 }, { "epoch": 0.728551724137931, "grad_norm": 3.5317468643188477, "learning_rate": 9.751831429109661e-06, "loss": 1.3162, "step": 2641 }, { "epoch": 0.7288275862068966, "grad_norm": 3.6740128993988037, "learning_rate": 9.751547503442458e-06, "loss": 1.1475, "step": 2642 }, { "epoch": 0.729103448275862, "grad_norm": 3.962266683578491, "learning_rate": 9.751263419588756e-06, "loss": 1.3253, "step": 2643 }, { "epoch": 0.7293793103448276, "grad_norm": 3.659327745437622, "learning_rate": 9.750979177558014e-06, "loss": 1.3398, "step": 2644 }, { "epoch": 0.7296551724137931, "grad_norm": 3.731961250305176, "learning_rate": 9.750694777359693e-06, "loss": 1.2986, "step": 2645 }, { "epoch": 0.7299310344827586, "grad_norm": 3.3858273029327393, "learning_rate": 9.750410219003262e-06, "loss": 1.1812, "step": 2646 }, { "epoch": 0.7302068965517241, "grad_norm": 3.6842548847198486, "learning_rate": 9.750125502498195e-06, "loss": 1.2073, "step": 2647 }, { "epoch": 0.7304827586206897, "grad_norm": 3.970787525177002, "learning_rate": 9.749840627853969e-06, "loss": 1.2184, "step": 2648 }, { "epoch": 0.7307586206896551, "grad_norm": 3.857639789581299, "learning_rate": 9.74955559508007e-06, "loss": 1.1625, "step": 2649 }, { "epoch": 0.7310344827586207, "grad_norm": 3.65733003616333, "learning_rate": 9.749270404185987e-06, "loss": 1.132, "step": 2650 }, { "epoch": 0.7313103448275862, "grad_norm": 3.853825330734253, "learning_rate": 9.748985055181213e-06, "loss": 1.4415, "step": 2651 }, { "epoch": 0.7315862068965517, "grad_norm": 3.685270309448242, "learning_rate": 9.74869954807525e-06, "loss": 1.2505, "step": 2652 }, { "epoch": 0.7318620689655172, "grad_norm": 3.6890835762023926, "learning_rate": 9.7484138828776e-06, "loss": 1.4823, "step": 2653 }, { "epoch": 0.7321379310344828, "grad_norm": 3.7289557456970215, "learning_rate": 9.748128059597774e-06, "loss": 1.1861, "step": 2654 }, { "epoch": 0.7324137931034482, "grad_norm": 3.259798526763916, "learning_rate": 9.74784207824529e-06, "loss": 1.2211, "step": 2655 }, { "epoch": 0.7326896551724138, "grad_norm": 3.5294034481048584, "learning_rate": 9.747555938829667e-06, "loss": 1.2923, "step": 2656 }, { "epoch": 0.7329655172413793, "grad_norm": 3.6764838695526123, "learning_rate": 9.747269641360431e-06, "loss": 1.2973, "step": 2657 }, { "epoch": 0.7332413793103448, "grad_norm": 3.585174083709717, "learning_rate": 9.746983185847111e-06, "loss": 1.1632, "step": 2658 }, { "epoch": 0.7335172413793103, "grad_norm": 3.7953531742095947, "learning_rate": 9.74669657229925e-06, "loss": 1.2897, "step": 2659 }, { "epoch": 0.7337931034482759, "grad_norm": 3.5105717182159424, "learning_rate": 9.746409800726381e-06, "loss": 1.1964, "step": 2660 }, { "epoch": 0.7340689655172414, "grad_norm": 3.56064510345459, "learning_rate": 9.74612287113806e-06, "loss": 1.1824, "step": 2661 }, { "epoch": 0.7343448275862069, "grad_norm": 3.822726249694824, "learning_rate": 9.745835783543833e-06, "loss": 1.2377, "step": 2662 }, { "epoch": 0.7346206896551725, "grad_norm": 3.5679869651794434, "learning_rate": 9.74554853795326e-06, "loss": 1.1835, "step": 2663 }, { "epoch": 0.7348965517241379, "grad_norm": 4.525552272796631, "learning_rate": 9.745261134375905e-06, "loss": 1.3216, "step": 2664 }, { "epoch": 0.7351724137931035, "grad_norm": 3.5972185134887695, "learning_rate": 9.744973572821332e-06, "loss": 1.2982, "step": 2665 }, { "epoch": 0.735448275862069, "grad_norm": 3.518578052520752, "learning_rate": 9.74468585329912e-06, "loss": 1.3077, "step": 2666 }, { "epoch": 0.7357241379310345, "grad_norm": 3.8067338466644287, "learning_rate": 9.744397975818843e-06, "loss": 1.1903, "step": 2667 }, { "epoch": 0.736, "grad_norm": 3.49039363861084, "learning_rate": 9.744109940390088e-06, "loss": 1.2107, "step": 2668 }, { "epoch": 0.7362758620689656, "grad_norm": 3.840935707092285, "learning_rate": 9.743821747022441e-06, "loss": 1.3259, "step": 2669 }, { "epoch": 0.736551724137931, "grad_norm": 3.7147457599639893, "learning_rate": 9.743533395725498e-06, "loss": 1.2835, "step": 2670 }, { "epoch": 0.7368275862068966, "grad_norm": 3.3936808109283447, "learning_rate": 9.74324488650886e-06, "loss": 1.204, "step": 2671 }, { "epoch": 0.737103448275862, "grad_norm": 3.595595598220825, "learning_rate": 9.74295621938213e-06, "loss": 1.2072, "step": 2672 }, { "epoch": 0.7373793103448276, "grad_norm": 3.703843832015991, "learning_rate": 9.74266739435492e-06, "loss": 1.045, "step": 2673 }, { "epoch": 0.7376551724137931, "grad_norm": 3.6930291652679443, "learning_rate": 9.742378411436844e-06, "loss": 1.2249, "step": 2674 }, { "epoch": 0.7379310344827587, "grad_norm": 3.9289309978485107, "learning_rate": 9.742089270637522e-06, "loss": 1.3648, "step": 2675 }, { "epoch": 0.7382068965517241, "grad_norm": 3.2657458782196045, "learning_rate": 9.741799971966583e-06, "loss": 1.191, "step": 2676 }, { "epoch": 0.7384827586206897, "grad_norm": 3.8284928798675537, "learning_rate": 9.741510515433657e-06, "loss": 1.2409, "step": 2677 }, { "epoch": 0.7387586206896551, "grad_norm": 3.458599090576172, "learning_rate": 9.741220901048376e-06, "loss": 1.2287, "step": 2678 }, { "epoch": 0.7390344827586207, "grad_norm": 3.7037789821624756, "learning_rate": 9.740931128820387e-06, "loss": 1.2071, "step": 2679 }, { "epoch": 0.7393103448275862, "grad_norm": 3.8153131008148193, "learning_rate": 9.740641198759337e-06, "loss": 1.1094, "step": 2680 }, { "epoch": 0.7395862068965517, "grad_norm": 3.9229421615600586, "learning_rate": 9.740351110874875e-06, "loss": 1.2243, "step": 2681 }, { "epoch": 0.7398620689655172, "grad_norm": 3.54732608795166, "learning_rate": 9.740060865176661e-06, "loss": 1.2466, "step": 2682 }, { "epoch": 0.7401379310344828, "grad_norm": 4.1302642822265625, "learning_rate": 9.739770461674357e-06, "loss": 1.2948, "step": 2683 }, { "epoch": 0.7404137931034482, "grad_norm": 3.6472220420837402, "learning_rate": 9.73947990037763e-06, "loss": 1.1882, "step": 2684 }, { "epoch": 0.7406896551724138, "grad_norm": 3.676114559173584, "learning_rate": 9.739189181296155e-06, "loss": 1.1217, "step": 2685 }, { "epoch": 0.7409655172413793, "grad_norm": 4.021399974822998, "learning_rate": 9.738898304439608e-06, "loss": 1.2717, "step": 2686 }, { "epoch": 0.7412413793103448, "grad_norm": 3.622917413711548, "learning_rate": 9.738607269817678e-06, "loss": 1.2891, "step": 2687 }, { "epoch": 0.7415172413793103, "grad_norm": 3.629591226577759, "learning_rate": 9.738316077440047e-06, "loss": 1.3592, "step": 2688 }, { "epoch": 0.7417931034482759, "grad_norm": 3.6861300468444824, "learning_rate": 9.738024727316414e-06, "loss": 1.1862, "step": 2689 }, { "epoch": 0.7420689655172413, "grad_norm": 3.364112615585327, "learning_rate": 9.737733219456476e-06, "loss": 1.1631, "step": 2690 }, { "epoch": 0.7423448275862069, "grad_norm": 3.7490439414978027, "learning_rate": 9.737441553869938e-06, "loss": 1.3028, "step": 2691 }, { "epoch": 0.7426206896551724, "grad_norm": 3.269869327545166, "learning_rate": 9.737149730566513e-06, "loss": 1.0449, "step": 2692 }, { "epoch": 0.7428965517241379, "grad_norm": 3.442181348800659, "learning_rate": 9.736857749555912e-06, "loss": 1.2398, "step": 2693 }, { "epoch": 0.7431724137931035, "grad_norm": 3.956716775894165, "learning_rate": 9.73656561084786e-06, "loss": 1.4238, "step": 2694 }, { "epoch": 0.743448275862069, "grad_norm": 3.691483736038208, "learning_rate": 9.736273314452079e-06, "loss": 1.2802, "step": 2695 }, { "epoch": 0.7437241379310345, "grad_norm": 3.564239501953125, "learning_rate": 9.735980860378302e-06, "loss": 1.1573, "step": 2696 }, { "epoch": 0.744, "grad_norm": 3.338259220123291, "learning_rate": 9.735688248636265e-06, "loss": 1.1316, "step": 2697 }, { "epoch": 0.7442758620689656, "grad_norm": 3.781363010406494, "learning_rate": 9.735395479235708e-06, "loss": 1.4107, "step": 2698 }, { "epoch": 0.744551724137931, "grad_norm": 4.197307586669922, "learning_rate": 9.73510255218638e-06, "loss": 1.5614, "step": 2699 }, { "epoch": 0.7448275862068966, "grad_norm": 3.514054298400879, "learning_rate": 9.734809467498031e-06, "loss": 1.3612, "step": 2700 }, { "epoch": 0.7451034482758621, "grad_norm": 4.224547863006592, "learning_rate": 9.73451622518042e-06, "loss": 1.4171, "step": 2701 }, { "epoch": 0.7453793103448276, "grad_norm": 3.6191909313201904, "learning_rate": 9.734222825243309e-06, "loss": 1.255, "step": 2702 }, { "epoch": 0.7456551724137931, "grad_norm": 3.733057975769043, "learning_rate": 9.733929267696463e-06, "loss": 1.4155, "step": 2703 }, { "epoch": 0.7459310344827587, "grad_norm": 3.546945095062256, "learning_rate": 9.73363555254966e-06, "loss": 1.2762, "step": 2704 }, { "epoch": 0.7462068965517241, "grad_norm": 3.995661735534668, "learning_rate": 9.733341679812673e-06, "loss": 1.2518, "step": 2705 }, { "epoch": 0.7464827586206897, "grad_norm": 3.7007787227630615, "learning_rate": 9.73304764949529e-06, "loss": 1.2636, "step": 2706 }, { "epoch": 0.7467586206896552, "grad_norm": 3.729652166366577, "learning_rate": 9.732753461607299e-06, "loss": 1.2002, "step": 2707 }, { "epoch": 0.7470344827586207, "grad_norm": 3.628657579421997, "learning_rate": 9.73245911615849e-06, "loss": 1.3552, "step": 2708 }, { "epoch": 0.7473103448275862, "grad_norm": 3.5366618633270264, "learning_rate": 9.732164613158668e-06, "loss": 1.2614, "step": 2709 }, { "epoch": 0.7475862068965518, "grad_norm": 3.6384804248809814, "learning_rate": 9.731869952617631e-06, "loss": 1.3574, "step": 2710 }, { "epoch": 0.7478620689655172, "grad_norm": 3.432405948638916, "learning_rate": 9.731575134545195e-06, "loss": 1.2419, "step": 2711 }, { "epoch": 0.7481379310344828, "grad_norm": 3.732320547103882, "learning_rate": 9.731280158951174e-06, "loss": 1.1848, "step": 2712 }, { "epoch": 0.7484137931034482, "grad_norm": 3.4699618816375732, "learning_rate": 9.730985025845384e-06, "loss": 1.1794, "step": 2713 }, { "epoch": 0.7486896551724138, "grad_norm": 3.5224146842956543, "learning_rate": 9.73068973523765e-06, "loss": 1.3327, "step": 2714 }, { "epoch": 0.7489655172413793, "grad_norm": 3.6039934158325195, "learning_rate": 9.730394287137809e-06, "loss": 1.2766, "step": 2715 }, { "epoch": 0.7492413793103448, "grad_norm": 3.5725741386413574, "learning_rate": 9.730098681555693e-06, "loss": 1.1772, "step": 2716 }, { "epoch": 0.7495172413793103, "grad_norm": 3.810591697692871, "learning_rate": 9.729802918501144e-06, "loss": 1.1478, "step": 2717 }, { "epoch": 0.7497931034482759, "grad_norm": 3.4417061805725098, "learning_rate": 9.729506997984005e-06, "loss": 0.9952, "step": 2718 }, { "epoch": 0.7500689655172413, "grad_norm": 3.4160027503967285, "learning_rate": 9.729210920014135e-06, "loss": 1.2118, "step": 2719 }, { "epoch": 0.7503448275862069, "grad_norm": 3.7203218936920166, "learning_rate": 9.728914684601385e-06, "loss": 1.2455, "step": 2720 }, { "epoch": 0.7506206896551724, "grad_norm": 3.716222047805786, "learning_rate": 9.728618291755618e-06, "loss": 1.3794, "step": 2721 }, { "epoch": 0.7508965517241379, "grad_norm": 3.7221853733062744, "learning_rate": 9.728321741486703e-06, "loss": 1.2592, "step": 2722 }, { "epoch": 0.7511724137931034, "grad_norm": 3.7359375953674316, "learning_rate": 9.728025033804511e-06, "loss": 1.2997, "step": 2723 }, { "epoch": 0.751448275862069, "grad_norm": 3.558309555053711, "learning_rate": 9.727728168718922e-06, "loss": 1.2056, "step": 2724 }, { "epoch": 0.7517241379310344, "grad_norm": 3.899291753768921, "learning_rate": 9.727431146239816e-06, "loss": 1.3095, "step": 2725 }, { "epoch": 0.752, "grad_norm": 3.557997226715088, "learning_rate": 9.727133966377085e-06, "loss": 1.2181, "step": 2726 }, { "epoch": 0.7522758620689655, "grad_norm": 3.5894806385040283, "learning_rate": 9.72683662914062e-06, "loss": 1.1833, "step": 2727 }, { "epoch": 0.752551724137931, "grad_norm": 3.649726390838623, "learning_rate": 9.72653913454032e-06, "loss": 1.3138, "step": 2728 }, { "epoch": 0.7528275862068966, "grad_norm": 4.002497673034668, "learning_rate": 9.726241482586089e-06, "loss": 1.229, "step": 2729 }, { "epoch": 0.7531034482758621, "grad_norm": 3.710495948791504, "learning_rate": 9.725943673287839e-06, "loss": 1.2768, "step": 2730 }, { "epoch": 0.7533793103448276, "grad_norm": 3.7212939262390137, "learning_rate": 9.72564570665548e-06, "loss": 1.2322, "step": 2731 }, { "epoch": 0.7536551724137931, "grad_norm": 3.8216724395751953, "learning_rate": 9.725347582698935e-06, "loss": 1.267, "step": 2732 }, { "epoch": 0.7539310344827587, "grad_norm": 3.6020803451538086, "learning_rate": 9.72504930142813e-06, "loss": 1.248, "step": 2733 }, { "epoch": 0.7542068965517241, "grad_norm": 3.685011148452759, "learning_rate": 9.72475086285299e-06, "loss": 1.3406, "step": 2734 }, { "epoch": 0.7544827586206897, "grad_norm": 3.807864189147949, "learning_rate": 9.724452266983457e-06, "loss": 1.2185, "step": 2735 }, { "epoch": 0.7547586206896552, "grad_norm": 4.058074951171875, "learning_rate": 9.724153513829467e-06, "loss": 1.4377, "step": 2736 }, { "epoch": 0.7550344827586207, "grad_norm": 3.510266065597534, "learning_rate": 9.723854603400968e-06, "loss": 1.2781, "step": 2737 }, { "epoch": 0.7553103448275862, "grad_norm": 3.536285161972046, "learning_rate": 9.723555535707912e-06, "loss": 1.2561, "step": 2738 }, { "epoch": 0.7555862068965518, "grad_norm": 3.6470818519592285, "learning_rate": 9.723256310760253e-06, "loss": 1.2991, "step": 2739 }, { "epoch": 0.7558620689655172, "grad_norm": 3.844083547592163, "learning_rate": 9.722956928567954e-06, "loss": 1.3975, "step": 2740 }, { "epoch": 0.7561379310344828, "grad_norm": 3.94984769821167, "learning_rate": 9.722657389140981e-06, "loss": 1.4279, "step": 2741 }, { "epoch": 0.7564137931034483, "grad_norm": 3.7615573406219482, "learning_rate": 9.722357692489309e-06, "loss": 1.3408, "step": 2742 }, { "epoch": 0.7566896551724138, "grad_norm": 3.6081104278564453, "learning_rate": 9.722057838622913e-06, "loss": 1.0934, "step": 2743 }, { "epoch": 0.7569655172413793, "grad_norm": 3.5714237689971924, "learning_rate": 9.721757827551774e-06, "loss": 1.3338, "step": 2744 }, { "epoch": 0.7572413793103449, "grad_norm": 3.736457586288452, "learning_rate": 9.721457659285884e-06, "loss": 1.1929, "step": 2745 }, { "epoch": 0.7575172413793103, "grad_norm": 3.8665504455566406, "learning_rate": 9.721157333835232e-06, "loss": 1.161, "step": 2746 }, { "epoch": 0.7577931034482759, "grad_norm": 3.7174503803253174, "learning_rate": 9.72085685120982e-06, "loss": 1.2177, "step": 2747 }, { "epoch": 0.7580689655172413, "grad_norm": 3.723637580871582, "learning_rate": 9.72055621141965e-06, "loss": 1.4067, "step": 2748 }, { "epoch": 0.7583448275862069, "grad_norm": 3.3065309524536133, "learning_rate": 9.720255414474728e-06, "loss": 1.1414, "step": 2749 }, { "epoch": 0.7586206896551724, "grad_norm": 3.623000383377075, "learning_rate": 9.719954460385073e-06, "loss": 1.292, "step": 2750 }, { "epoch": 0.758896551724138, "grad_norm": 3.7696571350097656, "learning_rate": 9.7196533491607e-06, "loss": 1.2031, "step": 2751 }, { "epoch": 0.7591724137931034, "grad_norm": 4.05004358291626, "learning_rate": 9.719352080811637e-06, "loss": 1.2984, "step": 2752 }, { "epoch": 0.759448275862069, "grad_norm": 3.5935683250427246, "learning_rate": 9.71905065534791e-06, "loss": 1.1385, "step": 2753 }, { "epoch": 0.7597241379310344, "grad_norm": 3.883335590362549, "learning_rate": 9.718749072779556e-06, "loss": 1.2416, "step": 2754 }, { "epoch": 0.76, "grad_norm": 3.717848300933838, "learning_rate": 9.718447333116617e-06, "loss": 1.327, "step": 2755 }, { "epoch": 0.7602758620689655, "grad_norm": 5.785478591918945, "learning_rate": 9.718145436369136e-06, "loss": 1.443, "step": 2756 }, { "epoch": 0.760551724137931, "grad_norm": 3.4049503803253174, "learning_rate": 9.717843382547162e-06, "loss": 1.2077, "step": 2757 }, { "epoch": 0.7608275862068965, "grad_norm": 3.6904549598693848, "learning_rate": 9.717541171660754e-06, "loss": 1.1771, "step": 2758 }, { "epoch": 0.7611034482758621, "grad_norm": 3.423945903778076, "learning_rate": 9.717238803719971e-06, "loss": 1.227, "step": 2759 }, { "epoch": 0.7613793103448275, "grad_norm": 3.6661696434020996, "learning_rate": 9.71693627873488e-06, "loss": 1.2417, "step": 2760 }, { "epoch": 0.7616551724137931, "grad_norm": 3.7867119312286377, "learning_rate": 9.716633596715556e-06, "loss": 1.3328, "step": 2761 }, { "epoch": 0.7619310344827587, "grad_norm": 3.4964447021484375, "learning_rate": 9.71633075767207e-06, "loss": 1.2843, "step": 2762 }, { "epoch": 0.7622068965517241, "grad_norm": 3.9261274337768555, "learning_rate": 9.716027761614506e-06, "loss": 1.4578, "step": 2763 }, { "epoch": 0.7624827586206897, "grad_norm": 3.9000730514526367, "learning_rate": 9.715724608552954e-06, "loss": 1.1708, "step": 2764 }, { "epoch": 0.7627586206896552, "grad_norm": 3.59509539604187, "learning_rate": 9.715421298497503e-06, "loss": 1.1246, "step": 2765 }, { "epoch": 0.7630344827586207, "grad_norm": 3.5547304153442383, "learning_rate": 9.71511783145825e-06, "loss": 1.2247, "step": 2766 }, { "epoch": 0.7633103448275862, "grad_norm": 3.9858362674713135, "learning_rate": 9.714814207445301e-06, "loss": 1.3517, "step": 2767 }, { "epoch": 0.7635862068965518, "grad_norm": 3.69067120552063, "learning_rate": 9.714510426468764e-06, "loss": 1.3795, "step": 2768 }, { "epoch": 0.7638620689655172, "grad_norm": 3.4982190132141113, "learning_rate": 9.714206488538752e-06, "loss": 1.0774, "step": 2769 }, { "epoch": 0.7641379310344828, "grad_norm": 4.158191204071045, "learning_rate": 9.71390239366538e-06, "loss": 1.3797, "step": 2770 }, { "epoch": 0.7644137931034483, "grad_norm": 3.658979892730713, "learning_rate": 9.713598141858775e-06, "loss": 1.1708, "step": 2771 }, { "epoch": 0.7646896551724138, "grad_norm": 3.914468288421631, "learning_rate": 9.713293733129067e-06, "loss": 1.3182, "step": 2772 }, { "epoch": 0.7649655172413793, "grad_norm": 3.689182758331299, "learning_rate": 9.712989167486388e-06, "loss": 1.3147, "step": 2773 }, { "epoch": 0.7652413793103449, "grad_norm": 3.373826503753662, "learning_rate": 9.712684444940878e-06, "loss": 1.2152, "step": 2774 }, { "epoch": 0.7655172413793103, "grad_norm": 3.637266159057617, "learning_rate": 9.712379565502683e-06, "loss": 1.284, "step": 2775 }, { "epoch": 0.7657931034482759, "grad_norm": 3.8215866088867188, "learning_rate": 9.712074529181952e-06, "loss": 1.3111, "step": 2776 }, { "epoch": 0.7660689655172414, "grad_norm": 3.464066982269287, "learning_rate": 9.711769335988838e-06, "loss": 1.2601, "step": 2777 }, { "epoch": 0.7663448275862069, "grad_norm": 3.4209415912628174, "learning_rate": 9.711463985933505e-06, "loss": 1.1479, "step": 2778 }, { "epoch": 0.7666206896551724, "grad_norm": 3.409152030944824, "learning_rate": 9.711158479026117e-06, "loss": 1.1296, "step": 2779 }, { "epoch": 0.766896551724138, "grad_norm": 3.8502182960510254, "learning_rate": 9.710852815276846e-06, "loss": 1.3506, "step": 2780 }, { "epoch": 0.7671724137931034, "grad_norm": 3.915529251098633, "learning_rate": 9.710546994695864e-06, "loss": 1.2604, "step": 2781 }, { "epoch": 0.767448275862069, "grad_norm": 3.5100317001342773, "learning_rate": 9.710241017293358e-06, "loss": 1.2331, "step": 2782 }, { "epoch": 0.7677241379310344, "grad_norm": 3.4085733890533447, "learning_rate": 9.70993488307951e-06, "loss": 1.0795, "step": 2783 }, { "epoch": 0.768, "grad_norm": 3.6883957386016846, "learning_rate": 9.709628592064513e-06, "loss": 1.1543, "step": 2784 }, { "epoch": 0.7682758620689655, "grad_norm": 3.8755059242248535, "learning_rate": 9.709322144258565e-06, "loss": 1.2038, "step": 2785 }, { "epoch": 0.768551724137931, "grad_norm": 3.6806588172912598, "learning_rate": 9.709015539671866e-06, "loss": 1.2338, "step": 2786 }, { "epoch": 0.7688275862068965, "grad_norm": 3.4830617904663086, "learning_rate": 9.708708778314625e-06, "loss": 1.2816, "step": 2787 }, { "epoch": 0.7691034482758621, "grad_norm": 3.8489811420440674, "learning_rate": 9.708401860197057e-06, "loss": 1.3316, "step": 2788 }, { "epoch": 0.7693793103448275, "grad_norm": 3.790444850921631, "learning_rate": 9.708094785329373e-06, "loss": 1.2573, "step": 2789 }, { "epoch": 0.7696551724137931, "grad_norm": 3.4206929206848145, "learning_rate": 9.707787553721802e-06, "loss": 1.0935, "step": 2790 }, { "epoch": 0.7699310344827586, "grad_norm": 3.9757392406463623, "learning_rate": 9.70748016538457e-06, "loss": 1.3073, "step": 2791 }, { "epoch": 0.7702068965517241, "grad_norm": 3.6334447860717773, "learning_rate": 9.707172620327912e-06, "loss": 1.1641, "step": 2792 }, { "epoch": 0.7704827586206896, "grad_norm": 3.637648105621338, "learning_rate": 9.706864918562062e-06, "loss": 1.2654, "step": 2793 }, { "epoch": 0.7707586206896552, "grad_norm": 3.639908790588379, "learning_rate": 9.70655706009727e-06, "loss": 1.2502, "step": 2794 }, { "epoch": 0.7710344827586207, "grad_norm": 3.628303050994873, "learning_rate": 9.706249044943781e-06, "loss": 1.2813, "step": 2795 }, { "epoch": 0.7713103448275862, "grad_norm": 3.5415751934051514, "learning_rate": 9.705940873111852e-06, "loss": 1.4003, "step": 2796 }, { "epoch": 0.7715862068965518, "grad_norm": 4.6621785163879395, "learning_rate": 9.70563254461174e-06, "loss": 1.3655, "step": 2797 }, { "epoch": 0.7718620689655172, "grad_norm": 3.6697468757629395, "learning_rate": 9.705324059453711e-06, "loss": 1.2026, "step": 2798 }, { "epoch": 0.7721379310344828, "grad_norm": 3.5697946548461914, "learning_rate": 9.705015417648033e-06, "loss": 1.1183, "step": 2799 }, { "epoch": 0.7724137931034483, "grad_norm": 3.332533836364746, "learning_rate": 9.704706619204986e-06, "loss": 1.3379, "step": 2800 }, { "epoch": 0.7726896551724138, "grad_norm": 3.2354393005371094, "learning_rate": 9.704397664134846e-06, "loss": 1.1764, "step": 2801 }, { "epoch": 0.7729655172413793, "grad_norm": 3.8922712802886963, "learning_rate": 9.704088552447902e-06, "loss": 1.1417, "step": 2802 }, { "epoch": 0.7732413793103449, "grad_norm": 4.504889488220215, "learning_rate": 9.70377928415444e-06, "loss": 1.3543, "step": 2803 }, { "epoch": 0.7735172413793103, "grad_norm": 4.004820823669434, "learning_rate": 9.703469859264758e-06, "loss": 1.2582, "step": 2804 }, { "epoch": 0.7737931034482759, "grad_norm": 3.8830299377441406, "learning_rate": 9.70316027778916e-06, "loss": 1.1107, "step": 2805 }, { "epoch": 0.7740689655172414, "grad_norm": 3.7906627655029297, "learning_rate": 9.70285053973795e-06, "loss": 1.2729, "step": 2806 }, { "epoch": 0.7743448275862069, "grad_norm": 3.881218671798706, "learning_rate": 9.702540645121443e-06, "loss": 1.2234, "step": 2807 }, { "epoch": 0.7746206896551724, "grad_norm": 3.8226189613342285, "learning_rate": 9.70223059394995e-06, "loss": 1.381, "step": 2808 }, { "epoch": 0.774896551724138, "grad_norm": 3.6547939777374268, "learning_rate": 9.701920386233796e-06, "loss": 1.2759, "step": 2809 }, { "epoch": 0.7751724137931034, "grad_norm": 3.6194210052490234, "learning_rate": 9.70161002198331e-06, "loss": 1.294, "step": 2810 }, { "epoch": 0.775448275862069, "grad_norm": 3.554476022720337, "learning_rate": 9.701299501208823e-06, "loss": 1.3509, "step": 2811 }, { "epoch": 0.7757241379310345, "grad_norm": 3.5241947174072266, "learning_rate": 9.700988823920672e-06, "loss": 1.2209, "step": 2812 }, { "epoch": 0.776, "grad_norm": 3.8539719581604004, "learning_rate": 9.7006779901292e-06, "loss": 1.1823, "step": 2813 }, { "epoch": 0.7762758620689655, "grad_norm": 4.042088985443115, "learning_rate": 9.700366999844757e-06, "loss": 1.3265, "step": 2814 }, { "epoch": 0.776551724137931, "grad_norm": 3.453306198120117, "learning_rate": 9.700055853077695e-06, "loss": 1.1358, "step": 2815 }, { "epoch": 0.7768275862068965, "grad_norm": 3.5905749797821045, "learning_rate": 9.699744549838373e-06, "loss": 1.124, "step": 2816 }, { "epoch": 0.7771034482758621, "grad_norm": 3.5488975048065186, "learning_rate": 9.699433090137153e-06, "loss": 1.1861, "step": 2817 }, { "epoch": 0.7773793103448275, "grad_norm": 3.2438154220581055, "learning_rate": 9.699121473984405e-06, "loss": 1.1655, "step": 2818 }, { "epoch": 0.7776551724137931, "grad_norm": 3.609112024307251, "learning_rate": 9.698809701390506e-06, "loss": 1.1826, "step": 2819 }, { "epoch": 0.7779310344827586, "grad_norm": 3.696808338165283, "learning_rate": 9.698497772365831e-06, "loss": 1.1579, "step": 2820 }, { "epoch": 0.7782068965517241, "grad_norm": 3.8019981384277344, "learning_rate": 9.69818568692077e-06, "loss": 1.247, "step": 2821 }, { "epoch": 0.7784827586206896, "grad_norm": 4.013712406158447, "learning_rate": 9.697873445065705e-06, "loss": 1.2277, "step": 2822 }, { "epoch": 0.7787586206896552, "grad_norm": 3.4745988845825195, "learning_rate": 9.697561046811039e-06, "loss": 1.1208, "step": 2823 }, { "epoch": 0.7790344827586206, "grad_norm": 4.246816158294678, "learning_rate": 9.697248492167166e-06, "loss": 1.22, "step": 2824 }, { "epoch": 0.7793103448275862, "grad_norm": 3.497236967086792, "learning_rate": 9.696935781144497e-06, "loss": 1.1989, "step": 2825 }, { "epoch": 0.7795862068965517, "grad_norm": 3.6110482215881348, "learning_rate": 9.696622913753438e-06, "loss": 1.0521, "step": 2826 }, { "epoch": 0.7798620689655172, "grad_norm": 3.818458318710327, "learning_rate": 9.696309890004408e-06, "loss": 1.3965, "step": 2827 }, { "epoch": 0.7801379310344828, "grad_norm": 4.817444801330566, "learning_rate": 9.695996709907824e-06, "loss": 1.32, "step": 2828 }, { "epoch": 0.7804137931034483, "grad_norm": 4.28647518157959, "learning_rate": 9.695683373474117e-06, "loss": 1.2888, "step": 2829 }, { "epoch": 0.7806896551724138, "grad_norm": 3.6070375442504883, "learning_rate": 9.695369880713717e-06, "loss": 1.2501, "step": 2830 }, { "epoch": 0.7809655172413793, "grad_norm": 3.7868545055389404, "learning_rate": 9.69505623163706e-06, "loss": 1.22, "step": 2831 }, { "epoch": 0.7812413793103449, "grad_norm": 3.305525302886963, "learning_rate": 9.694742426254586e-06, "loss": 1.0215, "step": 2832 }, { "epoch": 0.7815172413793103, "grad_norm": 3.5895895957946777, "learning_rate": 9.694428464576746e-06, "loss": 1.3972, "step": 2833 }, { "epoch": 0.7817931034482759, "grad_norm": 3.6353683471679688, "learning_rate": 9.694114346613989e-06, "loss": 1.2244, "step": 2834 }, { "epoch": 0.7820689655172414, "grad_norm": 3.532087564468384, "learning_rate": 9.693800072376775e-06, "loss": 1.2458, "step": 2835 }, { "epoch": 0.7823448275862069, "grad_norm": 3.6722490787506104, "learning_rate": 9.693485641875564e-06, "loss": 1.0301, "step": 2836 }, { "epoch": 0.7826206896551724, "grad_norm": 3.6219539642333984, "learning_rate": 9.693171055120826e-06, "loss": 1.2031, "step": 2837 }, { "epoch": 0.782896551724138, "grad_norm": 3.643819808959961, "learning_rate": 9.692856312123037e-06, "loss": 1.2621, "step": 2838 }, { "epoch": 0.7831724137931034, "grad_norm": 3.4984006881713867, "learning_rate": 9.692541412892666e-06, "loss": 1.3241, "step": 2839 }, { "epoch": 0.783448275862069, "grad_norm": 3.4514896869659424, "learning_rate": 9.692226357440205e-06, "loss": 1.258, "step": 2840 }, { "epoch": 0.7837241379310345, "grad_norm": 3.603003740310669, "learning_rate": 9.69191114577614e-06, "loss": 1.3914, "step": 2841 }, { "epoch": 0.784, "grad_norm": 3.4147398471832275, "learning_rate": 9.691595777910964e-06, "loss": 1.1953, "step": 2842 }, { "epoch": 0.7842758620689655, "grad_norm": 3.6042134761810303, "learning_rate": 9.691280253855178e-06, "loss": 1.3289, "step": 2843 }, { "epoch": 0.7845517241379311, "grad_norm": 3.656338930130005, "learning_rate": 9.690964573619286e-06, "loss": 1.2699, "step": 2844 }, { "epoch": 0.7848275862068965, "grad_norm": 3.7196571826934814, "learning_rate": 9.690648737213795e-06, "loss": 1.3167, "step": 2845 }, { "epoch": 0.7851034482758621, "grad_norm": 3.9806313514709473, "learning_rate": 9.690332744649222e-06, "loss": 1.225, "step": 2846 }, { "epoch": 0.7853793103448276, "grad_norm": 3.6289708614349365, "learning_rate": 9.690016595936087e-06, "loss": 1.1677, "step": 2847 }, { "epoch": 0.7856551724137931, "grad_norm": 3.5435914993286133, "learning_rate": 9.689700291084913e-06, "loss": 1.1673, "step": 2848 }, { "epoch": 0.7859310344827586, "grad_norm": 3.571784019470215, "learning_rate": 9.689383830106233e-06, "loss": 1.1912, "step": 2849 }, { "epoch": 0.7862068965517242, "grad_norm": 3.8022091388702393, "learning_rate": 9.68906721301058e-06, "loss": 1.3099, "step": 2850 }, { "epoch": 0.7864827586206896, "grad_norm": 3.580030918121338, "learning_rate": 9.688750439808496e-06, "loss": 1.2823, "step": 2851 }, { "epoch": 0.7867586206896552, "grad_norm": 3.9629323482513428, "learning_rate": 9.688433510510527e-06, "loss": 1.2594, "step": 2852 }, { "epoch": 0.7870344827586206, "grad_norm": 3.4662692546844482, "learning_rate": 9.688116425127224e-06, "loss": 1.1894, "step": 2853 }, { "epoch": 0.7873103448275862, "grad_norm": 3.7550694942474365, "learning_rate": 9.687799183669143e-06, "loss": 1.1678, "step": 2854 }, { "epoch": 0.7875862068965517, "grad_norm": 3.724625825881958, "learning_rate": 9.687481786146846e-06, "loss": 1.3844, "step": 2855 }, { "epoch": 0.7878620689655172, "grad_norm": 3.57576847076416, "learning_rate": 9.687164232570898e-06, "loss": 1.3044, "step": 2856 }, { "epoch": 0.7881379310344827, "grad_norm": 3.4647228717803955, "learning_rate": 9.686846522951875e-06, "loss": 1.1913, "step": 2857 }, { "epoch": 0.7884137931034483, "grad_norm": 3.937211513519287, "learning_rate": 9.686528657300346e-06, "loss": 1.1543, "step": 2858 }, { "epoch": 0.7886896551724137, "grad_norm": 3.9603114128112793, "learning_rate": 9.686210635626903e-06, "loss": 1.3497, "step": 2859 }, { "epoch": 0.7889655172413793, "grad_norm": 3.7314999103546143, "learning_rate": 9.685892457942127e-06, "loss": 1.3653, "step": 2860 }, { "epoch": 0.7892413793103449, "grad_norm": 3.3805649280548096, "learning_rate": 9.685574124256611e-06, "loss": 1.1911, "step": 2861 }, { "epoch": 0.7895172413793103, "grad_norm": 3.8993446826934814, "learning_rate": 9.685255634580957e-06, "loss": 1.2378, "step": 2862 }, { "epoch": 0.7897931034482759, "grad_norm": 4.197751998901367, "learning_rate": 9.684936988925763e-06, "loss": 1.4268, "step": 2863 }, { "epoch": 0.7900689655172414, "grad_norm": 3.7246782779693604, "learning_rate": 9.684618187301642e-06, "loss": 1.332, "step": 2864 }, { "epoch": 0.7903448275862069, "grad_norm": 3.289708375930786, "learning_rate": 9.684299229719203e-06, "loss": 1.1469, "step": 2865 }, { "epoch": 0.7906206896551724, "grad_norm": 3.530055522918701, "learning_rate": 9.683980116189067e-06, "loss": 1.3151, "step": 2866 }, { "epoch": 0.790896551724138, "grad_norm": 3.903390407562256, "learning_rate": 9.683660846721856e-06, "loss": 1.2828, "step": 2867 }, { "epoch": 0.7911724137931034, "grad_norm": 3.3766567707061768, "learning_rate": 9.683341421328203e-06, "loss": 1.1844, "step": 2868 }, { "epoch": 0.791448275862069, "grad_norm": 3.611060380935669, "learning_rate": 9.683021840018738e-06, "loss": 1.2383, "step": 2869 }, { "epoch": 0.7917241379310345, "grad_norm": 3.8821980953216553, "learning_rate": 9.682702102804101e-06, "loss": 1.3058, "step": 2870 }, { "epoch": 0.792, "grad_norm": 3.7734873294830322, "learning_rate": 9.682382209694939e-06, "loss": 1.2026, "step": 2871 }, { "epoch": 0.7922758620689655, "grad_norm": 3.544613838195801, "learning_rate": 9.682062160701899e-06, "loss": 1.2628, "step": 2872 }, { "epoch": 0.7925517241379311, "grad_norm": 3.310898542404175, "learning_rate": 9.681741955835637e-06, "loss": 1.3003, "step": 2873 }, { "epoch": 0.7928275862068965, "grad_norm": 3.501479387283325, "learning_rate": 9.681421595106815e-06, "loss": 1.2413, "step": 2874 }, { "epoch": 0.7931034482758621, "grad_norm": 3.928487777709961, "learning_rate": 9.681101078526094e-06, "loss": 1.2665, "step": 2875 }, { "epoch": 0.7933793103448276, "grad_norm": 3.620034694671631, "learning_rate": 9.680780406104149e-06, "loss": 1.2196, "step": 2876 }, { "epoch": 0.7936551724137931, "grad_norm": 3.774920701980591, "learning_rate": 9.680459577851651e-06, "loss": 1.3543, "step": 2877 }, { "epoch": 0.7939310344827586, "grad_norm": 4.665200710296631, "learning_rate": 9.680138593779288e-06, "loss": 1.1836, "step": 2878 }, { "epoch": 0.7942068965517242, "grad_norm": 4.105149269104004, "learning_rate": 9.679817453897738e-06, "loss": 1.2501, "step": 2879 }, { "epoch": 0.7944827586206896, "grad_norm": 3.7284653186798096, "learning_rate": 9.679496158217696e-06, "loss": 1.0949, "step": 2880 }, { "epoch": 0.7947586206896552, "grad_norm": 4.012763023376465, "learning_rate": 9.67917470674986e-06, "loss": 1.2168, "step": 2881 }, { "epoch": 0.7950344827586207, "grad_norm": 4.2312517166137695, "learning_rate": 9.678853099504929e-06, "loss": 1.4161, "step": 2882 }, { "epoch": 0.7953103448275862, "grad_norm": 3.506248950958252, "learning_rate": 9.67853133649361e-06, "loss": 1.239, "step": 2883 }, { "epoch": 0.7955862068965517, "grad_norm": 3.362842321395874, "learning_rate": 9.678209417726618e-06, "loss": 1.1614, "step": 2884 }, { "epoch": 0.7958620689655173, "grad_norm": 4.098961353302002, "learning_rate": 9.677887343214667e-06, "loss": 1.3163, "step": 2885 }, { "epoch": 0.7961379310344827, "grad_norm": 3.5676488876342773, "learning_rate": 9.677565112968479e-06, "loss": 1.1607, "step": 2886 }, { "epoch": 0.7964137931034483, "grad_norm": 3.631824493408203, "learning_rate": 9.677242726998783e-06, "loss": 1.4508, "step": 2887 }, { "epoch": 0.7966896551724137, "grad_norm": 3.8333280086517334, "learning_rate": 9.676920185316315e-06, "loss": 1.2914, "step": 2888 }, { "epoch": 0.7969655172413793, "grad_norm": 3.617638111114502, "learning_rate": 9.676597487931806e-06, "loss": 1.1103, "step": 2889 }, { "epoch": 0.7972413793103448, "grad_norm": 4.421525955200195, "learning_rate": 9.676274634856003e-06, "loss": 1.1291, "step": 2890 }, { "epoch": 0.7975172413793103, "grad_norm": 3.619903564453125, "learning_rate": 9.675951626099656e-06, "loss": 1.1547, "step": 2891 }, { "epoch": 0.7977931034482758, "grad_norm": 3.550098180770874, "learning_rate": 9.675628461673515e-06, "loss": 1.31, "step": 2892 }, { "epoch": 0.7980689655172414, "grad_norm": 3.741053581237793, "learning_rate": 9.675305141588337e-06, "loss": 1.2751, "step": 2893 }, { "epoch": 0.798344827586207, "grad_norm": 4.173596382141113, "learning_rate": 9.674981665854893e-06, "loss": 1.4595, "step": 2894 }, { "epoch": 0.7986206896551724, "grad_norm": 3.526067018508911, "learning_rate": 9.674658034483947e-06, "loss": 1.1922, "step": 2895 }, { "epoch": 0.798896551724138, "grad_norm": 3.8193821907043457, "learning_rate": 9.674334247486274e-06, "loss": 1.1981, "step": 2896 }, { "epoch": 0.7991724137931034, "grad_norm": 3.8108086585998535, "learning_rate": 9.674010304872653e-06, "loss": 1.2581, "step": 2897 }, { "epoch": 0.799448275862069, "grad_norm": 3.2421987056732178, "learning_rate": 9.67368620665387e-06, "loss": 1.3691, "step": 2898 }, { "epoch": 0.7997241379310345, "grad_norm": 3.453404188156128, "learning_rate": 9.673361952840711e-06, "loss": 1.2022, "step": 2899 }, { "epoch": 0.8, "grad_norm": 3.276597023010254, "learning_rate": 9.673037543443977e-06, "loss": 1.2246, "step": 2900 }, { "epoch": 0.8002758620689655, "grad_norm": 3.396411418914795, "learning_rate": 9.672712978474464e-06, "loss": 1.0289, "step": 2901 }, { "epoch": 0.8005517241379311, "grad_norm": 3.5374865531921387, "learning_rate": 9.672388257942977e-06, "loss": 1.3248, "step": 2902 }, { "epoch": 0.8008275862068965, "grad_norm": 3.484429121017456, "learning_rate": 9.672063381860329e-06, "loss": 1.2297, "step": 2903 }, { "epoch": 0.8011034482758621, "grad_norm": 3.4648001194000244, "learning_rate": 9.671738350237332e-06, "loss": 1.2147, "step": 2904 }, { "epoch": 0.8013793103448276, "grad_norm": 3.757676839828491, "learning_rate": 9.67141316308481e-06, "loss": 1.3488, "step": 2905 }, { "epoch": 0.8016551724137931, "grad_norm": 3.7045352458953857, "learning_rate": 9.671087820413589e-06, "loss": 1.2232, "step": 2906 }, { "epoch": 0.8019310344827586, "grad_norm": 3.808595895767212, "learning_rate": 9.670762322234498e-06, "loss": 1.3133, "step": 2907 }, { "epoch": 0.8022068965517242, "grad_norm": 3.812049150466919, "learning_rate": 9.670436668558375e-06, "loss": 1.2292, "step": 2908 }, { "epoch": 0.8024827586206896, "grad_norm": 3.7383017539978027, "learning_rate": 9.67011085939606e-06, "loss": 1.1597, "step": 2909 }, { "epoch": 0.8027586206896552, "grad_norm": 3.533710241317749, "learning_rate": 9.669784894758403e-06, "loss": 1.3406, "step": 2910 }, { "epoch": 0.8030344827586207, "grad_norm": 3.423973321914673, "learning_rate": 9.66945877465625e-06, "loss": 1.1323, "step": 2911 }, { "epoch": 0.8033103448275862, "grad_norm": 3.922379493713379, "learning_rate": 9.669132499100465e-06, "loss": 1.3072, "step": 2912 }, { "epoch": 0.8035862068965517, "grad_norm": 3.6543960571289062, "learning_rate": 9.668806068101906e-06, "loss": 1.2287, "step": 2913 }, { "epoch": 0.8038620689655173, "grad_norm": 4.285608768463135, "learning_rate": 9.66847948167144e-06, "loss": 1.2276, "step": 2914 }, { "epoch": 0.8041379310344827, "grad_norm": 3.6123909950256348, "learning_rate": 9.668152739819942e-06, "loss": 1.2461, "step": 2915 }, { "epoch": 0.8044137931034483, "grad_norm": 3.8273470401763916, "learning_rate": 9.667825842558288e-06, "loss": 1.3768, "step": 2916 }, { "epoch": 0.8046896551724138, "grad_norm": 3.6442058086395264, "learning_rate": 9.667498789897363e-06, "loss": 1.452, "step": 2917 }, { "epoch": 0.8049655172413793, "grad_norm": 3.778012752532959, "learning_rate": 9.667171581848052e-06, "loss": 1.3398, "step": 2918 }, { "epoch": 0.8052413793103448, "grad_norm": 3.490568161010742, "learning_rate": 9.66684421842125e-06, "loss": 1.163, "step": 2919 }, { "epoch": 0.8055172413793104, "grad_norm": 3.732060432434082, "learning_rate": 9.666516699627854e-06, "loss": 1.3191, "step": 2920 }, { "epoch": 0.8057931034482758, "grad_norm": 3.858429431915283, "learning_rate": 9.666189025478772e-06, "loss": 1.3046, "step": 2921 }, { "epoch": 0.8060689655172414, "grad_norm": 3.337714433670044, "learning_rate": 9.66586119598491e-06, "loss": 1.2481, "step": 2922 }, { "epoch": 0.8063448275862068, "grad_norm": 3.7136707305908203, "learning_rate": 9.665533211157178e-06, "loss": 1.343, "step": 2923 }, { "epoch": 0.8066206896551724, "grad_norm": 3.4942708015441895, "learning_rate": 9.6652050710065e-06, "loss": 1.0877, "step": 2924 }, { "epoch": 0.8068965517241379, "grad_norm": 3.5066301822662354, "learning_rate": 9.6648767755438e-06, "loss": 1.1737, "step": 2925 }, { "epoch": 0.8071724137931034, "grad_norm": 3.0878138542175293, "learning_rate": 9.664548324780008e-06, "loss": 1.1509, "step": 2926 }, { "epoch": 0.807448275862069, "grad_norm": 3.3705852031707764, "learning_rate": 9.664219718726055e-06, "loss": 1.2242, "step": 2927 }, { "epoch": 0.8077241379310345, "grad_norm": 3.684183120727539, "learning_rate": 9.663890957392884e-06, "loss": 1.3412, "step": 2928 }, { "epoch": 0.808, "grad_norm": 3.745936393737793, "learning_rate": 9.66356204079144e-06, "loss": 1.3616, "step": 2929 }, { "epoch": 0.8082758620689655, "grad_norm": 3.423750400543213, "learning_rate": 9.663232968932669e-06, "loss": 1.3138, "step": 2930 }, { "epoch": 0.8085517241379311, "grad_norm": 3.5936238765716553, "learning_rate": 9.662903741827532e-06, "loss": 1.2977, "step": 2931 }, { "epoch": 0.8088275862068965, "grad_norm": 3.9920191764831543, "learning_rate": 9.662574359486987e-06, "loss": 1.3435, "step": 2932 }, { "epoch": 0.8091034482758621, "grad_norm": 3.9612772464752197, "learning_rate": 9.662244821922e-06, "loss": 1.4108, "step": 2933 }, { "epoch": 0.8093793103448276, "grad_norm": 3.59700083732605, "learning_rate": 9.66191512914354e-06, "loss": 1.1252, "step": 2934 }, { "epoch": 0.8096551724137931, "grad_norm": 3.4425954818725586, "learning_rate": 9.661585281162585e-06, "loss": 1.2032, "step": 2935 }, { "epoch": 0.8099310344827586, "grad_norm": 3.8960626125335693, "learning_rate": 9.661255277990115e-06, "loss": 1.4197, "step": 2936 }, { "epoch": 0.8102068965517242, "grad_norm": 3.1638870239257812, "learning_rate": 9.660925119637118e-06, "loss": 1.1988, "step": 2937 }, { "epoch": 0.8104827586206896, "grad_norm": 3.5093040466308594, "learning_rate": 9.660594806114585e-06, "loss": 1.1885, "step": 2938 }, { "epoch": 0.8107586206896552, "grad_norm": 3.715731143951416, "learning_rate": 9.66026433743351e-06, "loss": 1.2734, "step": 2939 }, { "epoch": 0.8110344827586207, "grad_norm": 3.6320536136627197, "learning_rate": 9.6599337136049e-06, "loss": 1.2578, "step": 2940 }, { "epoch": 0.8113103448275862, "grad_norm": 3.4825620651245117, "learning_rate": 9.659602934639756e-06, "loss": 1.1337, "step": 2941 }, { "epoch": 0.8115862068965517, "grad_norm": 3.3275721073150635, "learning_rate": 9.659272000549093e-06, "loss": 1.1845, "step": 2942 }, { "epoch": 0.8118620689655173, "grad_norm": 3.148757219314575, "learning_rate": 9.65894091134393e-06, "loss": 1.2039, "step": 2943 }, { "epoch": 0.8121379310344827, "grad_norm": 3.742133140563965, "learning_rate": 9.658609667035289e-06, "loss": 1.2437, "step": 2944 }, { "epoch": 0.8124137931034483, "grad_norm": 3.502598285675049, "learning_rate": 9.658278267634193e-06, "loss": 1.2981, "step": 2945 }, { "epoch": 0.8126896551724138, "grad_norm": 3.5892910957336426, "learning_rate": 9.65794671315168e-06, "loss": 1.2855, "step": 2946 }, { "epoch": 0.8129655172413793, "grad_norm": 3.479294776916504, "learning_rate": 9.657615003598789e-06, "loss": 1.2707, "step": 2947 }, { "epoch": 0.8132413793103448, "grad_norm": 3.6230926513671875, "learning_rate": 9.657283138986557e-06, "loss": 1.1741, "step": 2948 }, { "epoch": 0.8135172413793104, "grad_norm": 3.5600287914276123, "learning_rate": 9.656951119326036e-06, "loss": 1.1797, "step": 2949 }, { "epoch": 0.8137931034482758, "grad_norm": 3.502013921737671, "learning_rate": 9.65661894462828e-06, "loss": 1.309, "step": 2950 }, { "epoch": 0.8140689655172414, "grad_norm": 3.9161126613616943, "learning_rate": 9.656286614904347e-06, "loss": 1.3048, "step": 2951 }, { "epoch": 0.8143448275862069, "grad_norm": 4.115235805511475, "learning_rate": 9.6559541301653e-06, "loss": 1.3704, "step": 2952 }, { "epoch": 0.8146206896551724, "grad_norm": 3.528857707977295, "learning_rate": 9.655621490422209e-06, "loss": 1.2093, "step": 2953 }, { "epoch": 0.8148965517241379, "grad_norm": 3.554860830307007, "learning_rate": 9.655288695686147e-06, "loss": 1.1998, "step": 2954 }, { "epoch": 0.8151724137931035, "grad_norm": 3.957257032394409, "learning_rate": 9.654955745968195e-06, "loss": 1.277, "step": 2955 }, { "epoch": 0.8154482758620689, "grad_norm": 3.4143033027648926, "learning_rate": 9.654622641279435e-06, "loss": 1.1721, "step": 2956 }, { "epoch": 0.8157241379310345, "grad_norm": 3.362894058227539, "learning_rate": 9.654289381630959e-06, "loss": 1.1669, "step": 2957 }, { "epoch": 0.816, "grad_norm": 4.006445407867432, "learning_rate": 9.65395596703386e-06, "loss": 1.1991, "step": 2958 }, { "epoch": 0.8162758620689655, "grad_norm": 3.7989776134490967, "learning_rate": 9.653622397499239e-06, "loss": 1.1453, "step": 2959 }, { "epoch": 0.8165517241379311, "grad_norm": 3.339053153991699, "learning_rate": 9.653288673038199e-06, "loss": 1.2111, "step": 2960 }, { "epoch": 0.8168275862068965, "grad_norm": 3.522122621536255, "learning_rate": 9.652954793661853e-06, "loss": 1.2984, "step": 2961 }, { "epoch": 0.8171034482758621, "grad_norm": 3.8125030994415283, "learning_rate": 9.652620759381315e-06, "loss": 1.3741, "step": 2962 }, { "epoch": 0.8173793103448276, "grad_norm": 3.4254310131073, "learning_rate": 9.652286570207705e-06, "loss": 1.1343, "step": 2963 }, { "epoch": 0.8176551724137932, "grad_norm": 4.166012763977051, "learning_rate": 9.651952226152151e-06, "loss": 1.1713, "step": 2964 }, { "epoch": 0.8179310344827586, "grad_norm": 3.85870623588562, "learning_rate": 9.65161772722578e-06, "loss": 1.3114, "step": 2965 }, { "epoch": 0.8182068965517242, "grad_norm": 3.635528802871704, "learning_rate": 9.651283073439732e-06, "loss": 1.276, "step": 2966 }, { "epoch": 0.8184827586206896, "grad_norm": 3.3238353729248047, "learning_rate": 9.650948264805146e-06, "loss": 1.1857, "step": 2967 }, { "epoch": 0.8187586206896552, "grad_norm": 3.6863653659820557, "learning_rate": 9.650613301333168e-06, "loss": 1.3281, "step": 2968 }, { "epoch": 0.8190344827586207, "grad_norm": 3.5318939685821533, "learning_rate": 9.650278183034951e-06, "loss": 1.3246, "step": 2969 }, { "epoch": 0.8193103448275862, "grad_norm": 3.649792432785034, "learning_rate": 9.649942909921651e-06, "loss": 1.2973, "step": 2970 }, { "epoch": 0.8195862068965517, "grad_norm": 3.351839303970337, "learning_rate": 9.649607482004428e-06, "loss": 1.197, "step": 2971 }, { "epoch": 0.8198620689655173, "grad_norm": 3.4857914447784424, "learning_rate": 9.64927189929445e-06, "loss": 1.308, "step": 2972 }, { "epoch": 0.8201379310344827, "grad_norm": 3.6390528678894043, "learning_rate": 9.648936161802892e-06, "loss": 1.2609, "step": 2973 }, { "epoch": 0.8204137931034483, "grad_norm": 3.436347007751465, "learning_rate": 9.648600269540927e-06, "loss": 1.3407, "step": 2974 }, { "epoch": 0.8206896551724138, "grad_norm": 3.4241509437561035, "learning_rate": 9.648264222519738e-06, "loss": 1.2185, "step": 2975 }, { "epoch": 0.8209655172413793, "grad_norm": 3.8195157051086426, "learning_rate": 9.647928020750516e-06, "loss": 1.3263, "step": 2976 }, { "epoch": 0.8212413793103448, "grad_norm": 4.152968883514404, "learning_rate": 9.647591664244451e-06, "loss": 1.299, "step": 2977 }, { "epoch": 0.8215172413793104, "grad_norm": 3.433778762817383, "learning_rate": 9.64725515301274e-06, "loss": 1.1281, "step": 2978 }, { "epoch": 0.8217931034482758, "grad_norm": 3.25472354888916, "learning_rate": 9.646918487066588e-06, "loss": 1.2424, "step": 2979 }, { "epoch": 0.8220689655172414, "grad_norm": 3.5632805824279785, "learning_rate": 9.646581666417203e-06, "loss": 1.2752, "step": 2980 }, { "epoch": 0.8223448275862069, "grad_norm": 3.8935415744781494, "learning_rate": 9.646244691075797e-06, "loss": 1.2029, "step": 2981 }, { "epoch": 0.8226206896551724, "grad_norm": 3.749163866043091, "learning_rate": 9.645907561053588e-06, "loss": 1.2719, "step": 2982 }, { "epoch": 0.8228965517241379, "grad_norm": 3.6032066345214844, "learning_rate": 9.645570276361801e-06, "loss": 1.0338, "step": 2983 }, { "epoch": 0.8231724137931035, "grad_norm": 4.004724025726318, "learning_rate": 9.645232837011664e-06, "loss": 1.2737, "step": 2984 }, { "epoch": 0.8234482758620689, "grad_norm": 3.808551788330078, "learning_rate": 9.644895243014414e-06, "loss": 1.1507, "step": 2985 }, { "epoch": 0.8237241379310345, "grad_norm": 4.038171768188477, "learning_rate": 9.644557494381284e-06, "loss": 1.3231, "step": 2986 }, { "epoch": 0.824, "grad_norm": 3.838376998901367, "learning_rate": 9.644219591123523e-06, "loss": 1.3329, "step": 2987 }, { "epoch": 0.8242758620689655, "grad_norm": 3.7274179458618164, "learning_rate": 9.643881533252378e-06, "loss": 1.3396, "step": 2988 }, { "epoch": 0.824551724137931, "grad_norm": 3.4629313945770264, "learning_rate": 9.643543320779105e-06, "loss": 1.336, "step": 2989 }, { "epoch": 0.8248275862068966, "grad_norm": 4.23643159866333, "learning_rate": 9.643204953714963e-06, "loss": 1.2282, "step": 2990 }, { "epoch": 0.825103448275862, "grad_norm": 3.410979747772217, "learning_rate": 9.642866432071216e-06, "loss": 1.1773, "step": 2991 }, { "epoch": 0.8253793103448276, "grad_norm": 3.6902763843536377, "learning_rate": 9.642527755859134e-06, "loss": 1.139, "step": 2992 }, { "epoch": 0.825655172413793, "grad_norm": 3.586066722869873, "learning_rate": 9.642188925089994e-06, "loss": 1.4033, "step": 2993 }, { "epoch": 0.8259310344827586, "grad_norm": 3.443357229232788, "learning_rate": 9.641849939775075e-06, "loss": 1.3839, "step": 2994 }, { "epoch": 0.8262068965517242, "grad_norm": 3.3608551025390625, "learning_rate": 9.641510799925662e-06, "loss": 1.2, "step": 2995 }, { "epoch": 0.8264827586206897, "grad_norm": 3.5778727531433105, "learning_rate": 9.641171505553045e-06, "loss": 1.1189, "step": 2996 }, { "epoch": 0.8267586206896552, "grad_norm": 3.158844470977783, "learning_rate": 9.64083205666852e-06, "loss": 1.106, "step": 2997 }, { "epoch": 0.8270344827586207, "grad_norm": 3.6303212642669678, "learning_rate": 9.64049245328339e-06, "loss": 1.2232, "step": 2998 }, { "epoch": 0.8273103448275863, "grad_norm": 3.7639925479888916, "learning_rate": 9.640152695408957e-06, "loss": 1.2972, "step": 2999 }, { "epoch": 0.8275862068965517, "grad_norm": 3.4607203006744385, "learning_rate": 9.639812783056537e-06, "loss": 1.0527, "step": 3000 }, { "epoch": 0.8275862068965517, "eval_loss": 1.261593222618103, "eval_runtime": 11.2238, "eval_samples_per_second": 35.639, "eval_steps_per_second": 4.455, "step": 3000 }, { "epoch": 0.8278620689655173, "grad_norm": 4.074929237365723, "learning_rate": 9.639472716237441e-06, "loss": 1.5582, "step": 3001 }, { "epoch": 0.8281379310344827, "grad_norm": 3.3602261543273926, "learning_rate": 9.639132494962993e-06, "loss": 1.183, "step": 3002 }, { "epoch": 0.8284137931034483, "grad_norm": 3.4768834114074707, "learning_rate": 9.63879211924452e-06, "loss": 1.2374, "step": 3003 }, { "epoch": 0.8286896551724138, "grad_norm": 3.332768201828003, "learning_rate": 9.638451589093351e-06, "loss": 1.1753, "step": 3004 }, { "epoch": 0.8289655172413793, "grad_norm": 4.011821746826172, "learning_rate": 9.638110904520827e-06, "loss": 1.2214, "step": 3005 }, { "epoch": 0.8292413793103448, "grad_norm": 3.87518310546875, "learning_rate": 9.637770065538287e-06, "loss": 1.3883, "step": 3006 }, { "epoch": 0.8295172413793104, "grad_norm": 3.7135941982269287, "learning_rate": 9.637429072157078e-06, "loss": 1.388, "step": 3007 }, { "epoch": 0.8297931034482758, "grad_norm": 3.858281135559082, "learning_rate": 9.637087924388552e-06, "loss": 1.3178, "step": 3008 }, { "epoch": 0.8300689655172414, "grad_norm": 3.1404037475585938, "learning_rate": 9.636746622244069e-06, "loss": 1.2843, "step": 3009 }, { "epoch": 0.8303448275862069, "grad_norm": 3.4775705337524414, "learning_rate": 9.63640516573499e-06, "loss": 1.1668, "step": 3010 }, { "epoch": 0.8306206896551724, "grad_norm": 3.430506944656372, "learning_rate": 9.63606355487268e-06, "loss": 1.1495, "step": 3011 }, { "epoch": 0.8308965517241379, "grad_norm": 3.49544095993042, "learning_rate": 9.635721789668516e-06, "loss": 1.1885, "step": 3012 }, { "epoch": 0.8311724137931035, "grad_norm": 3.663717269897461, "learning_rate": 9.635379870133874e-06, "loss": 1.2695, "step": 3013 }, { "epoch": 0.8314482758620689, "grad_norm": 3.341658115386963, "learning_rate": 9.635037796280135e-06, "loss": 1.2205, "step": 3014 }, { "epoch": 0.8317241379310345, "grad_norm": 3.358872175216675, "learning_rate": 9.634695568118692e-06, "loss": 1.2884, "step": 3015 }, { "epoch": 0.832, "grad_norm": 3.368006706237793, "learning_rate": 9.634353185660933e-06, "loss": 1.1938, "step": 3016 }, { "epoch": 0.8322758620689655, "grad_norm": 3.484983444213867, "learning_rate": 9.634010648918262e-06, "loss": 1.1991, "step": 3017 }, { "epoch": 0.832551724137931, "grad_norm": 3.762089967727661, "learning_rate": 9.633667957902078e-06, "loss": 1.4611, "step": 3018 }, { "epoch": 0.8328275862068966, "grad_norm": 3.45200777053833, "learning_rate": 9.63332511262379e-06, "loss": 1.4052, "step": 3019 }, { "epoch": 0.833103448275862, "grad_norm": 3.843175172805786, "learning_rate": 9.632982113094814e-06, "loss": 1.2713, "step": 3020 }, { "epoch": 0.8333793103448276, "grad_norm": 3.565997838973999, "learning_rate": 9.632638959326567e-06, "loss": 1.2193, "step": 3021 }, { "epoch": 0.833655172413793, "grad_norm": 3.7694058418273926, "learning_rate": 9.632295651330476e-06, "loss": 1.2463, "step": 3022 }, { "epoch": 0.8339310344827586, "grad_norm": 3.4591965675354004, "learning_rate": 9.631952189117968e-06, "loss": 1.2194, "step": 3023 }, { "epoch": 0.8342068965517241, "grad_norm": 3.801712989807129, "learning_rate": 9.63160857270048e-06, "loss": 1.3258, "step": 3024 }, { "epoch": 0.8344827586206897, "grad_norm": 3.1947214603424072, "learning_rate": 9.631264802089445e-06, "loss": 1.0735, "step": 3025 }, { "epoch": 0.8347586206896551, "grad_norm": 3.605628252029419, "learning_rate": 9.630920877296316e-06, "loss": 1.2044, "step": 3026 }, { "epoch": 0.8350344827586207, "grad_norm": 3.3585922718048096, "learning_rate": 9.630576798332537e-06, "loss": 1.1743, "step": 3027 }, { "epoch": 0.8353103448275863, "grad_norm": 3.8514411449432373, "learning_rate": 9.630232565209565e-06, "loss": 1.2416, "step": 3028 }, { "epoch": 0.8355862068965517, "grad_norm": 3.3820362091064453, "learning_rate": 9.62988817793886e-06, "loss": 1.2494, "step": 3029 }, { "epoch": 0.8358620689655173, "grad_norm": 3.4113380908966064, "learning_rate": 9.629543636531888e-06, "loss": 1.1715, "step": 3030 }, { "epoch": 0.8361379310344828, "grad_norm": 3.612791061401367, "learning_rate": 9.629198941000118e-06, "loss": 1.1051, "step": 3031 }, { "epoch": 0.8364137931034483, "grad_norm": 3.5412352085113525, "learning_rate": 9.628854091355025e-06, "loss": 1.3367, "step": 3032 }, { "epoch": 0.8366896551724138, "grad_norm": 3.270270824432373, "learning_rate": 9.628509087608092e-06, "loss": 1.1891, "step": 3033 }, { "epoch": 0.8369655172413794, "grad_norm": 3.833127498626709, "learning_rate": 9.628163929770801e-06, "loss": 1.3135, "step": 3034 }, { "epoch": 0.8372413793103448, "grad_norm": 3.8826684951782227, "learning_rate": 9.627818617854647e-06, "loss": 1.2934, "step": 3035 }, { "epoch": 0.8375172413793104, "grad_norm": 3.6517772674560547, "learning_rate": 9.627473151871124e-06, "loss": 1.2306, "step": 3036 }, { "epoch": 0.8377931034482758, "grad_norm": 3.7963600158691406, "learning_rate": 9.627127531831732e-06, "loss": 1.1897, "step": 3037 }, { "epoch": 0.8380689655172414, "grad_norm": 3.527907609939575, "learning_rate": 9.626781757747979e-06, "loss": 1.2488, "step": 3038 }, { "epoch": 0.8383448275862069, "grad_norm": 3.1539463996887207, "learning_rate": 9.626435829631376e-06, "loss": 1.2548, "step": 3039 }, { "epoch": 0.8386206896551724, "grad_norm": 3.4147119522094727, "learning_rate": 9.62608974749344e-06, "loss": 1.4439, "step": 3040 }, { "epoch": 0.8388965517241379, "grad_norm": 3.3309717178344727, "learning_rate": 9.62574351134569e-06, "loss": 1.2553, "step": 3041 }, { "epoch": 0.8391724137931035, "grad_norm": 3.650853157043457, "learning_rate": 9.625397121199655e-06, "loss": 1.2645, "step": 3042 }, { "epoch": 0.8394482758620689, "grad_norm": 3.6695456504821777, "learning_rate": 9.625050577066867e-06, "loss": 1.3327, "step": 3043 }, { "epoch": 0.8397241379310345, "grad_norm": 3.44952392578125, "learning_rate": 9.624703878958862e-06, "loss": 1.2037, "step": 3044 }, { "epoch": 0.84, "grad_norm": 3.4493086338043213, "learning_rate": 9.624357026887183e-06, "loss": 1.1051, "step": 3045 }, { "epoch": 0.8402758620689655, "grad_norm": 3.4424097537994385, "learning_rate": 9.624010020863377e-06, "loss": 1.1534, "step": 3046 }, { "epoch": 0.840551724137931, "grad_norm": 3.735222101211548, "learning_rate": 9.623662860898995e-06, "loss": 1.259, "step": 3047 }, { "epoch": 0.8408275862068966, "grad_norm": 3.630280017852783, "learning_rate": 9.623315547005596e-06, "loss": 1.2331, "step": 3048 }, { "epoch": 0.841103448275862, "grad_norm": 3.4499709606170654, "learning_rate": 9.622968079194743e-06, "loss": 1.1527, "step": 3049 }, { "epoch": 0.8413793103448276, "grad_norm": 3.275527000427246, "learning_rate": 9.622620457478004e-06, "loss": 1.2634, "step": 3050 }, { "epoch": 0.8416551724137931, "grad_norm": 3.343935966491699, "learning_rate": 9.622272681866948e-06, "loss": 1.1805, "step": 3051 }, { "epoch": 0.8419310344827586, "grad_norm": 3.7564103603363037, "learning_rate": 9.621924752373159e-06, "loss": 1.3081, "step": 3052 }, { "epoch": 0.8422068965517241, "grad_norm": 3.4055964946746826, "learning_rate": 9.621576669008217e-06, "loss": 1.1862, "step": 3053 }, { "epoch": 0.8424827586206897, "grad_norm": 4.088786602020264, "learning_rate": 9.621228431783709e-06, "loss": 1.2746, "step": 3054 }, { "epoch": 0.8427586206896551, "grad_norm": 3.7431960105895996, "learning_rate": 9.620880040711229e-06, "loss": 1.1922, "step": 3055 }, { "epoch": 0.8430344827586207, "grad_norm": 3.9021031856536865, "learning_rate": 9.620531495802377e-06, "loss": 1.1485, "step": 3056 }, { "epoch": 0.8433103448275862, "grad_norm": 3.618971109390259, "learning_rate": 9.620182797068756e-06, "loss": 1.11, "step": 3057 }, { "epoch": 0.8435862068965517, "grad_norm": 3.8943116664886475, "learning_rate": 9.619833944521975e-06, "loss": 1.1805, "step": 3058 }, { "epoch": 0.8438620689655172, "grad_norm": 3.8246169090270996, "learning_rate": 9.619484938173646e-06, "loss": 1.372, "step": 3059 }, { "epoch": 0.8441379310344828, "grad_norm": 3.7654836177825928, "learning_rate": 9.619135778035391e-06, "loss": 1.2493, "step": 3060 }, { "epoch": 0.8444137931034483, "grad_norm": 3.4744303226470947, "learning_rate": 9.618786464118832e-06, "loss": 1.2887, "step": 3061 }, { "epoch": 0.8446896551724138, "grad_norm": 3.9514105319976807, "learning_rate": 9.618436996435598e-06, "loss": 1.3032, "step": 3062 }, { "epoch": 0.8449655172413794, "grad_norm": 3.624908924102783, "learning_rate": 9.618087374997325e-06, "loss": 1.2438, "step": 3063 }, { "epoch": 0.8452413793103448, "grad_norm": 3.3930065631866455, "learning_rate": 9.61773759981565e-06, "loss": 1.2021, "step": 3064 }, { "epoch": 0.8455172413793104, "grad_norm": 4.0089311599731445, "learning_rate": 9.617387670902219e-06, "loss": 1.2956, "step": 3065 }, { "epoch": 0.8457931034482759, "grad_norm": 3.624481439590454, "learning_rate": 9.617037588268682e-06, "loss": 1.2913, "step": 3066 }, { "epoch": 0.8460689655172414, "grad_norm": 3.694326639175415, "learning_rate": 9.616687351926694e-06, "loss": 1.2617, "step": 3067 }, { "epoch": 0.8463448275862069, "grad_norm": 3.657409191131592, "learning_rate": 9.616336961887914e-06, "loss": 1.3552, "step": 3068 }, { "epoch": 0.8466206896551725, "grad_norm": 3.5715126991271973, "learning_rate": 9.615986418164009e-06, "loss": 1.2297, "step": 3069 }, { "epoch": 0.8468965517241379, "grad_norm": 3.734299421310425, "learning_rate": 9.615635720766644e-06, "loss": 1.397, "step": 3070 }, { "epoch": 0.8471724137931035, "grad_norm": 3.63893985748291, "learning_rate": 9.615284869707501e-06, "loss": 1.4977, "step": 3071 }, { "epoch": 0.847448275862069, "grad_norm": 3.6230976581573486, "learning_rate": 9.614933864998257e-06, "loss": 1.3197, "step": 3072 }, { "epoch": 0.8477241379310345, "grad_norm": 3.588839054107666, "learning_rate": 9.614582706650598e-06, "loss": 1.1875, "step": 3073 }, { "epoch": 0.848, "grad_norm": 3.2522504329681396, "learning_rate": 9.614231394676213e-06, "loss": 1.1602, "step": 3074 }, { "epoch": 0.8482758620689655, "grad_norm": 3.8092997074127197, "learning_rate": 9.613879929086798e-06, "loss": 1.3386, "step": 3075 }, { "epoch": 0.848551724137931, "grad_norm": 3.5930731296539307, "learning_rate": 9.613528309894056e-06, "loss": 1.2734, "step": 3076 }, { "epoch": 0.8488275862068966, "grad_norm": 3.340459108352661, "learning_rate": 9.613176537109693e-06, "loss": 1.2843, "step": 3077 }, { "epoch": 0.849103448275862, "grad_norm": 3.5279574394226074, "learning_rate": 9.612824610745418e-06, "loss": 1.1882, "step": 3078 }, { "epoch": 0.8493793103448276, "grad_norm": 3.628519296646118, "learning_rate": 9.612472530812949e-06, "loss": 1.3255, "step": 3079 }, { "epoch": 0.8496551724137931, "grad_norm": 3.7329087257385254, "learning_rate": 9.612120297324006e-06, "loss": 1.1949, "step": 3080 }, { "epoch": 0.8499310344827586, "grad_norm": 3.5227158069610596, "learning_rate": 9.611767910290314e-06, "loss": 1.3358, "step": 3081 }, { "epoch": 0.8502068965517241, "grad_norm": 3.8667519092559814, "learning_rate": 9.611415369723609e-06, "loss": 1.2212, "step": 3082 }, { "epoch": 0.8504827586206897, "grad_norm": 3.3041298389434814, "learning_rate": 9.611062675635623e-06, "loss": 1.1842, "step": 3083 }, { "epoch": 0.8507586206896551, "grad_norm": 3.6706702709198, "learning_rate": 9.6107098280381e-06, "loss": 1.193, "step": 3084 }, { "epoch": 0.8510344827586207, "grad_norm": 4.011791706085205, "learning_rate": 9.610356826942788e-06, "loss": 1.3259, "step": 3085 }, { "epoch": 0.8513103448275862, "grad_norm": 4.0871992111206055, "learning_rate": 9.610003672361436e-06, "loss": 1.2794, "step": 3086 }, { "epoch": 0.8515862068965517, "grad_norm": 3.419118642807007, "learning_rate": 9.609650364305803e-06, "loss": 1.2372, "step": 3087 }, { "epoch": 0.8518620689655172, "grad_norm": 3.7999489307403564, "learning_rate": 9.609296902787653e-06, "loss": 1.1905, "step": 3088 }, { "epoch": 0.8521379310344828, "grad_norm": 3.968339681625366, "learning_rate": 9.608943287818748e-06, "loss": 1.2774, "step": 3089 }, { "epoch": 0.8524137931034482, "grad_norm": 3.3703982830047607, "learning_rate": 9.608589519410864e-06, "loss": 1.1989, "step": 3090 }, { "epoch": 0.8526896551724138, "grad_norm": 3.592784881591797, "learning_rate": 9.608235597575779e-06, "loss": 1.2522, "step": 3091 }, { "epoch": 0.8529655172413793, "grad_norm": 3.545248508453369, "learning_rate": 9.607881522325276e-06, "loss": 1.2485, "step": 3092 }, { "epoch": 0.8532413793103448, "grad_norm": 3.687314033508301, "learning_rate": 9.607527293671139e-06, "loss": 1.1542, "step": 3093 }, { "epoch": 0.8535172413793104, "grad_norm": 3.9167850017547607, "learning_rate": 9.607172911625163e-06, "loss": 1.3865, "step": 3094 }, { "epoch": 0.8537931034482759, "grad_norm": 3.450343132019043, "learning_rate": 9.606818376199147e-06, "loss": 1.2657, "step": 3095 }, { "epoch": 0.8540689655172414, "grad_norm": 3.5390310287475586, "learning_rate": 9.606463687404891e-06, "loss": 1.2247, "step": 3096 }, { "epoch": 0.8543448275862069, "grad_norm": 3.6538188457489014, "learning_rate": 9.60610884525421e-06, "loss": 1.3408, "step": 3097 }, { "epoch": 0.8546206896551725, "grad_norm": 3.4386661052703857, "learning_rate": 9.605753849758908e-06, "loss": 1.2323, "step": 3098 }, { "epoch": 0.8548965517241379, "grad_norm": 3.520425796508789, "learning_rate": 9.60539870093081e-06, "loss": 1.3088, "step": 3099 }, { "epoch": 0.8551724137931035, "grad_norm": 3.6927731037139893, "learning_rate": 9.605043398781737e-06, "loss": 1.3584, "step": 3100 }, { "epoch": 0.855448275862069, "grad_norm": 3.8383376598358154, "learning_rate": 9.604687943323518e-06, "loss": 1.385, "step": 3101 }, { "epoch": 0.8557241379310345, "grad_norm": 3.2329654693603516, "learning_rate": 9.604332334567987e-06, "loss": 1.1941, "step": 3102 }, { "epoch": 0.856, "grad_norm": 3.190814971923828, "learning_rate": 9.603976572526982e-06, "loss": 1.1775, "step": 3103 }, { "epoch": 0.8562758620689656, "grad_norm": 3.4075558185577393, "learning_rate": 9.603620657212347e-06, "loss": 1.2224, "step": 3104 }, { "epoch": 0.856551724137931, "grad_norm": 3.3192050457000732, "learning_rate": 9.603264588635933e-06, "loss": 1.449, "step": 3105 }, { "epoch": 0.8568275862068966, "grad_norm": 3.587496280670166, "learning_rate": 9.602908366809589e-06, "loss": 1.1972, "step": 3106 }, { "epoch": 0.857103448275862, "grad_norm": 3.3893027305603027, "learning_rate": 9.60255199174518e-06, "loss": 1.2537, "step": 3107 }, { "epoch": 0.8573793103448276, "grad_norm": 3.059473752975464, "learning_rate": 9.602195463454567e-06, "loss": 1.1179, "step": 3108 }, { "epoch": 0.8576551724137931, "grad_norm": 3.518882989883423, "learning_rate": 9.60183878194962e-06, "loss": 1.2883, "step": 3109 }, { "epoch": 0.8579310344827586, "grad_norm": 3.3172714710235596, "learning_rate": 9.601481947242213e-06, "loss": 1.217, "step": 3110 }, { "epoch": 0.8582068965517241, "grad_norm": 3.5399057865142822, "learning_rate": 9.601124959344228e-06, "loss": 1.3108, "step": 3111 }, { "epoch": 0.8584827586206897, "grad_norm": 3.4675040245056152, "learning_rate": 9.600767818267547e-06, "loss": 1.1902, "step": 3112 }, { "epoch": 0.8587586206896551, "grad_norm": 3.550144672393799, "learning_rate": 9.60041052402406e-06, "loss": 1.3067, "step": 3113 }, { "epoch": 0.8590344827586207, "grad_norm": 3.770336151123047, "learning_rate": 9.600053076625665e-06, "loss": 1.227, "step": 3114 }, { "epoch": 0.8593103448275862, "grad_norm": 3.409184217453003, "learning_rate": 9.599695476084257e-06, "loss": 1.1831, "step": 3115 }, { "epoch": 0.8595862068965517, "grad_norm": 3.7795569896698, "learning_rate": 9.599337722411745e-06, "loss": 1.2017, "step": 3116 }, { "epoch": 0.8598620689655172, "grad_norm": 3.5291810035705566, "learning_rate": 9.598979815620038e-06, "loss": 1.3394, "step": 3117 }, { "epoch": 0.8601379310344828, "grad_norm": 3.5307445526123047, "learning_rate": 9.59862175572105e-06, "loss": 1.1432, "step": 3118 }, { "epoch": 0.8604137931034482, "grad_norm": 3.991224765777588, "learning_rate": 9.598263542726703e-06, "loss": 1.33, "step": 3119 }, { "epoch": 0.8606896551724138, "grad_norm": 3.6143293380737305, "learning_rate": 9.597905176648923e-06, "loss": 1.3027, "step": 3120 }, { "epoch": 0.8609655172413793, "grad_norm": 3.5434956550598145, "learning_rate": 9.597546657499639e-06, "loss": 1.256, "step": 3121 }, { "epoch": 0.8612413793103448, "grad_norm": 3.4678287506103516, "learning_rate": 9.597187985290786e-06, "loss": 1.2457, "step": 3122 }, { "epoch": 0.8615172413793103, "grad_norm": 3.9092447757720947, "learning_rate": 9.596829160034307e-06, "loss": 1.3259, "step": 3123 }, { "epoch": 0.8617931034482759, "grad_norm": 3.93876314163208, "learning_rate": 9.596470181742149e-06, "loss": 1.3917, "step": 3124 }, { "epoch": 0.8620689655172413, "grad_norm": 3.199347972869873, "learning_rate": 9.596111050426258e-06, "loss": 1.3004, "step": 3125 }, { "epoch": 0.8623448275862069, "grad_norm": 3.3754289150238037, "learning_rate": 9.595751766098593e-06, "loss": 1.2019, "step": 3126 }, { "epoch": 0.8626206896551725, "grad_norm": 3.4596619606018066, "learning_rate": 9.595392328771117e-06, "loss": 1.3245, "step": 3127 }, { "epoch": 0.8628965517241379, "grad_norm": 3.73744797706604, "learning_rate": 9.595032738455791e-06, "loss": 1.1843, "step": 3128 }, { "epoch": 0.8631724137931035, "grad_norm": 3.6582627296447754, "learning_rate": 9.594672995164593e-06, "loss": 1.2369, "step": 3129 }, { "epoch": 0.863448275862069, "grad_norm": 3.6449437141418457, "learning_rate": 9.594313098909494e-06, "loss": 1.2334, "step": 3130 }, { "epoch": 0.8637241379310345, "grad_norm": 3.257354259490967, "learning_rate": 9.593953049702479e-06, "loss": 1.138, "step": 3131 }, { "epoch": 0.864, "grad_norm": 3.660043239593506, "learning_rate": 9.59359284755553e-06, "loss": 1.2753, "step": 3132 }, { "epoch": 0.8642758620689656, "grad_norm": 3.807157278060913, "learning_rate": 9.593232492480645e-06, "loss": 1.237, "step": 3133 }, { "epoch": 0.864551724137931, "grad_norm": 3.451781749725342, "learning_rate": 9.592871984489816e-06, "loss": 1.3074, "step": 3134 }, { "epoch": 0.8648275862068966, "grad_norm": 3.8246493339538574, "learning_rate": 9.592511323595048e-06, "loss": 1.4022, "step": 3135 }, { "epoch": 0.865103448275862, "grad_norm": 3.8542094230651855, "learning_rate": 9.592150509808344e-06, "loss": 1.4582, "step": 3136 }, { "epoch": 0.8653793103448276, "grad_norm": 3.6567542552948, "learning_rate": 9.59178954314172e-06, "loss": 1.4282, "step": 3137 }, { "epoch": 0.8656551724137931, "grad_norm": 3.6269724369049072, "learning_rate": 9.591428423607192e-06, "loss": 1.2102, "step": 3138 }, { "epoch": 0.8659310344827587, "grad_norm": 3.750521421432495, "learning_rate": 9.591067151216782e-06, "loss": 1.2586, "step": 3139 }, { "epoch": 0.8662068965517241, "grad_norm": 3.5806360244750977, "learning_rate": 9.590705725982517e-06, "loss": 1.2326, "step": 3140 }, { "epoch": 0.8664827586206897, "grad_norm": 3.4283225536346436, "learning_rate": 9.590344147916429e-06, "loss": 1.2281, "step": 3141 }, { "epoch": 0.8667586206896551, "grad_norm": 3.2604925632476807, "learning_rate": 9.589982417030556e-06, "loss": 1.2464, "step": 3142 }, { "epoch": 0.8670344827586207, "grad_norm": 3.572087049484253, "learning_rate": 9.589620533336942e-06, "loss": 1.368, "step": 3143 }, { "epoch": 0.8673103448275862, "grad_norm": 3.3857340812683105, "learning_rate": 9.589258496847633e-06, "loss": 1.2946, "step": 3144 }, { "epoch": 0.8675862068965517, "grad_norm": 3.4563021659851074, "learning_rate": 9.588896307574681e-06, "loss": 1.2675, "step": 3145 }, { "epoch": 0.8678620689655172, "grad_norm": 3.8174874782562256, "learning_rate": 9.588533965530146e-06, "loss": 1.4291, "step": 3146 }, { "epoch": 0.8681379310344828, "grad_norm": 3.1128041744232178, "learning_rate": 9.58817147072609e-06, "loss": 1.1298, "step": 3147 }, { "epoch": 0.8684137931034482, "grad_norm": 3.5496416091918945, "learning_rate": 9.58780882317458e-06, "loss": 1.2046, "step": 3148 }, { "epoch": 0.8686896551724138, "grad_norm": 3.6199567317962646, "learning_rate": 9.587446022887692e-06, "loss": 1.2246, "step": 3149 }, { "epoch": 0.8689655172413793, "grad_norm": 3.9319076538085938, "learning_rate": 9.587083069877501e-06, "loss": 1.2884, "step": 3150 }, { "epoch": 0.8692413793103448, "grad_norm": 3.344435930252075, "learning_rate": 9.58671996415609e-06, "loss": 1.2073, "step": 3151 }, { "epoch": 0.8695172413793103, "grad_norm": 3.7600810527801514, "learning_rate": 9.586356705735551e-06, "loss": 1.2304, "step": 3152 }, { "epoch": 0.8697931034482759, "grad_norm": 3.986220359802246, "learning_rate": 9.585993294627975e-06, "loss": 1.4596, "step": 3153 }, { "epoch": 0.8700689655172413, "grad_norm": 3.6853764057159424, "learning_rate": 9.585629730845461e-06, "loss": 1.1795, "step": 3154 }, { "epoch": 0.8703448275862069, "grad_norm": 3.9566662311553955, "learning_rate": 9.585266014400112e-06, "loss": 1.248, "step": 3155 }, { "epoch": 0.8706206896551724, "grad_norm": 3.2977006435394287, "learning_rate": 9.584902145304037e-06, "loss": 1.2849, "step": 3156 }, { "epoch": 0.8708965517241379, "grad_norm": 3.776054620742798, "learning_rate": 9.58453812356935e-06, "loss": 1.3817, "step": 3157 }, { "epoch": 0.8711724137931034, "grad_norm": 3.804626226425171, "learning_rate": 9.584173949208172e-06, "loss": 1.3072, "step": 3158 }, { "epoch": 0.871448275862069, "grad_norm": 3.5277099609375, "learning_rate": 9.583809622232623e-06, "loss": 1.1887, "step": 3159 }, { "epoch": 0.8717241379310345, "grad_norm": 3.382887840270996, "learning_rate": 9.583445142654834e-06, "loss": 1.1832, "step": 3160 }, { "epoch": 0.872, "grad_norm": 3.4829442501068115, "learning_rate": 9.58308051048694e-06, "loss": 1.2561, "step": 3161 }, { "epoch": 0.8722758620689656, "grad_norm": 3.842151641845703, "learning_rate": 9.582715725741077e-06, "loss": 1.4689, "step": 3162 }, { "epoch": 0.872551724137931, "grad_norm": 3.3378028869628906, "learning_rate": 9.582350788429392e-06, "loss": 1.1971, "step": 3163 }, { "epoch": 0.8728275862068966, "grad_norm": 3.76741886138916, "learning_rate": 9.581985698564033e-06, "loss": 1.3183, "step": 3164 }, { "epoch": 0.8731034482758621, "grad_norm": 3.408555030822754, "learning_rate": 9.581620456157157e-06, "loss": 1.1762, "step": 3165 }, { "epoch": 0.8733793103448276, "grad_norm": 3.9329113960266113, "learning_rate": 9.58125506122092e-06, "loss": 1.248, "step": 3166 }, { "epoch": 0.8736551724137931, "grad_norm": 3.4869556427001953, "learning_rate": 9.580889513767489e-06, "loss": 1.1923, "step": 3167 }, { "epoch": 0.8739310344827587, "grad_norm": 3.4187183380126953, "learning_rate": 9.580523813809032e-06, "loss": 1.2728, "step": 3168 }, { "epoch": 0.8742068965517241, "grad_norm": 3.485503911972046, "learning_rate": 9.580157961357724e-06, "loss": 1.2275, "step": 3169 }, { "epoch": 0.8744827586206897, "grad_norm": 3.2384910583496094, "learning_rate": 9.579791956425746e-06, "loss": 1.1584, "step": 3170 }, { "epoch": 0.8747586206896552, "grad_norm": 3.438499927520752, "learning_rate": 9.579425799025283e-06, "loss": 1.1518, "step": 3171 }, { "epoch": 0.8750344827586207, "grad_norm": 3.4769325256347656, "learning_rate": 9.579059489168524e-06, "loss": 1.0746, "step": 3172 }, { "epoch": 0.8753103448275862, "grad_norm": 3.929135799407959, "learning_rate": 9.578693026867664e-06, "loss": 1.2621, "step": 3173 }, { "epoch": 0.8755862068965518, "grad_norm": 3.2267963886260986, "learning_rate": 9.578326412134903e-06, "loss": 1.1071, "step": 3174 }, { "epoch": 0.8758620689655172, "grad_norm": 3.898585796356201, "learning_rate": 9.577959644982447e-06, "loss": 1.3864, "step": 3175 }, { "epoch": 0.8761379310344828, "grad_norm": 3.7737860679626465, "learning_rate": 9.577592725422504e-06, "loss": 1.3938, "step": 3176 }, { "epoch": 0.8764137931034482, "grad_norm": 3.3141746520996094, "learning_rate": 9.577225653467293e-06, "loss": 1.1419, "step": 3177 }, { "epoch": 0.8766896551724138, "grad_norm": 3.6061148643493652, "learning_rate": 9.576858429129032e-06, "loss": 1.2409, "step": 3178 }, { "epoch": 0.8769655172413793, "grad_norm": 3.658414602279663, "learning_rate": 9.576491052419948e-06, "loss": 1.2165, "step": 3179 }, { "epoch": 0.8772413793103448, "grad_norm": 3.5088109970092773, "learning_rate": 9.576123523352267e-06, "loss": 1.1716, "step": 3180 }, { "epoch": 0.8775172413793103, "grad_norm": 3.5573372840881348, "learning_rate": 9.575755841938231e-06, "loss": 1.2721, "step": 3181 }, { "epoch": 0.8777931034482759, "grad_norm": 3.503406047821045, "learning_rate": 9.575388008190077e-06, "loss": 1.1105, "step": 3182 }, { "epoch": 0.8780689655172413, "grad_norm": 4.129842758178711, "learning_rate": 9.575020022120052e-06, "loss": 1.5029, "step": 3183 }, { "epoch": 0.8783448275862069, "grad_norm": 3.41748046875, "learning_rate": 9.574651883740407e-06, "loss": 1.1991, "step": 3184 }, { "epoch": 0.8786206896551724, "grad_norm": 3.794509172439575, "learning_rate": 9.574283593063395e-06, "loss": 1.3262, "step": 3185 }, { "epoch": 0.8788965517241379, "grad_norm": 3.6735944747924805, "learning_rate": 9.573915150101283e-06, "loss": 1.2964, "step": 3186 }, { "epoch": 0.8791724137931034, "grad_norm": 3.4296205043792725, "learning_rate": 9.57354655486633e-06, "loss": 1.197, "step": 3187 }, { "epoch": 0.879448275862069, "grad_norm": 3.4255757331848145, "learning_rate": 9.573177807370813e-06, "loss": 1.2697, "step": 3188 }, { "epoch": 0.8797241379310344, "grad_norm": 3.5769245624542236, "learning_rate": 9.572808907627002e-06, "loss": 1.2692, "step": 3189 }, { "epoch": 0.88, "grad_norm": 3.44024920463562, "learning_rate": 9.572439855647186e-06, "loss": 1.1908, "step": 3190 }, { "epoch": 0.8802758620689655, "grad_norm": 4.431703090667725, "learning_rate": 9.572070651443645e-06, "loss": 1.338, "step": 3191 }, { "epoch": 0.880551724137931, "grad_norm": 3.46419620513916, "learning_rate": 9.571701295028673e-06, "loss": 1.2932, "step": 3192 }, { "epoch": 0.8808275862068966, "grad_norm": 3.69352650642395, "learning_rate": 9.571331786414566e-06, "loss": 1.1755, "step": 3193 }, { "epoch": 0.8811034482758621, "grad_norm": 3.4771528244018555, "learning_rate": 9.570962125613625e-06, "loss": 1.2482, "step": 3194 }, { "epoch": 0.8813793103448276, "grad_norm": 3.5651237964630127, "learning_rate": 9.570592312638158e-06, "loss": 1.2395, "step": 3195 }, { "epoch": 0.8816551724137931, "grad_norm": 3.7023370265960693, "learning_rate": 9.570222347500475e-06, "loss": 1.3085, "step": 3196 }, { "epoch": 0.8819310344827587, "grad_norm": 3.888033390045166, "learning_rate": 9.569852230212894e-06, "loss": 1.3129, "step": 3197 }, { "epoch": 0.8822068965517241, "grad_norm": 3.6959471702575684, "learning_rate": 9.569481960787736e-06, "loss": 1.1729, "step": 3198 }, { "epoch": 0.8824827586206897, "grad_norm": 3.8137526512145996, "learning_rate": 9.56911153923733e-06, "loss": 1.322, "step": 3199 }, { "epoch": 0.8827586206896552, "grad_norm": 3.9738686084747314, "learning_rate": 9.568740965574005e-06, "loss": 1.4046, "step": 3200 }, { "epoch": 0.8830344827586207, "grad_norm": 3.778623104095459, "learning_rate": 9.568370239810099e-06, "loss": 1.1611, "step": 3201 }, { "epoch": 0.8833103448275862, "grad_norm": 3.7447216510772705, "learning_rate": 9.567999361957954e-06, "loss": 1.3592, "step": 3202 }, { "epoch": 0.8835862068965518, "grad_norm": 3.8256421089172363, "learning_rate": 9.567628332029917e-06, "loss": 1.2064, "step": 3203 }, { "epoch": 0.8838620689655172, "grad_norm": 3.550814151763916, "learning_rate": 9.56725715003834e-06, "loss": 1.2862, "step": 3204 }, { "epoch": 0.8841379310344828, "grad_norm": 3.7916483879089355, "learning_rate": 9.566885815995582e-06, "loss": 1.2768, "step": 3205 }, { "epoch": 0.8844137931034483, "grad_norm": 3.709059953689575, "learning_rate": 9.566514329914002e-06, "loss": 1.2962, "step": 3206 }, { "epoch": 0.8846896551724138, "grad_norm": 3.600689172744751, "learning_rate": 9.566142691805974e-06, "loss": 1.296, "step": 3207 }, { "epoch": 0.8849655172413793, "grad_norm": 3.7938857078552246, "learning_rate": 9.56577090168386e-06, "loss": 1.3931, "step": 3208 }, { "epoch": 0.8852413793103449, "grad_norm": 3.6194348335266113, "learning_rate": 9.565398959560046e-06, "loss": 1.2513, "step": 3209 }, { "epoch": 0.8855172413793103, "grad_norm": 3.5975077152252197, "learning_rate": 9.565026865446912e-06, "loss": 1.3388, "step": 3210 }, { "epoch": 0.8857931034482759, "grad_norm": 3.2674665451049805, "learning_rate": 9.564654619356843e-06, "loss": 1.1493, "step": 3211 }, { "epoch": 0.8860689655172413, "grad_norm": 3.405819892883301, "learning_rate": 9.564282221302236e-06, "loss": 1.2464, "step": 3212 }, { "epoch": 0.8863448275862069, "grad_norm": 3.284191846847534, "learning_rate": 9.563909671295488e-06, "loss": 1.1658, "step": 3213 }, { "epoch": 0.8866206896551724, "grad_norm": 3.160726547241211, "learning_rate": 9.563536969348998e-06, "loss": 1.1585, "step": 3214 }, { "epoch": 0.886896551724138, "grad_norm": 4.062303066253662, "learning_rate": 9.563164115475177e-06, "loss": 1.3851, "step": 3215 }, { "epoch": 0.8871724137931034, "grad_norm": 3.776263475418091, "learning_rate": 9.562791109686438e-06, "loss": 1.3266, "step": 3216 }, { "epoch": 0.887448275862069, "grad_norm": 3.5236878395080566, "learning_rate": 9.562417951995197e-06, "loss": 1.2569, "step": 3217 }, { "epoch": 0.8877241379310344, "grad_norm": 3.7026984691619873, "learning_rate": 9.562044642413878e-06, "loss": 1.3009, "step": 3218 }, { "epoch": 0.888, "grad_norm": 3.4346859455108643, "learning_rate": 9.56167118095491e-06, "loss": 1.1271, "step": 3219 }, { "epoch": 0.8882758620689655, "grad_norm": 3.3678576946258545, "learning_rate": 9.561297567630725e-06, "loss": 1.1754, "step": 3220 }, { "epoch": 0.888551724137931, "grad_norm": 3.408714771270752, "learning_rate": 9.560923802453762e-06, "loss": 1.0572, "step": 3221 }, { "epoch": 0.8888275862068965, "grad_norm": 3.596501350402832, "learning_rate": 9.560549885436462e-06, "loss": 1.3489, "step": 3222 }, { "epoch": 0.8891034482758621, "grad_norm": 3.3781166076660156, "learning_rate": 9.560175816591278e-06, "loss": 1.2171, "step": 3223 }, { "epoch": 0.8893793103448275, "grad_norm": 3.3525283336639404, "learning_rate": 9.559801595930659e-06, "loss": 1.2528, "step": 3224 }, { "epoch": 0.8896551724137931, "grad_norm": 3.5599632263183594, "learning_rate": 9.559427223467065e-06, "loss": 1.2602, "step": 3225 }, { "epoch": 0.8899310344827587, "grad_norm": 3.914930820465088, "learning_rate": 9.559052699212959e-06, "loss": 1.1262, "step": 3226 }, { "epoch": 0.8902068965517241, "grad_norm": 3.5243611335754395, "learning_rate": 9.55867802318081e-06, "loss": 1.2037, "step": 3227 }, { "epoch": 0.8904827586206897, "grad_norm": 3.5603957176208496, "learning_rate": 9.55830319538309e-06, "loss": 1.3184, "step": 3228 }, { "epoch": 0.8907586206896552, "grad_norm": 3.860563039779663, "learning_rate": 9.557928215832281e-06, "loss": 1.3035, "step": 3229 }, { "epoch": 0.8910344827586207, "grad_norm": 3.708880662918091, "learning_rate": 9.557553084540862e-06, "loss": 1.1937, "step": 3230 }, { "epoch": 0.8913103448275862, "grad_norm": 4.017889499664307, "learning_rate": 9.557177801521328e-06, "loss": 1.2685, "step": 3231 }, { "epoch": 0.8915862068965518, "grad_norm": 3.3972198963165283, "learning_rate": 9.556802366786166e-06, "loss": 1.2555, "step": 3232 }, { "epoch": 0.8918620689655172, "grad_norm": 3.906176805496216, "learning_rate": 9.556426780347878e-06, "loss": 1.2376, "step": 3233 }, { "epoch": 0.8921379310344828, "grad_norm": 3.4824166297912598, "learning_rate": 9.556051042218967e-06, "loss": 1.2988, "step": 3234 }, { "epoch": 0.8924137931034483, "grad_norm": 3.828197479248047, "learning_rate": 9.555675152411945e-06, "loss": 1.313, "step": 3235 }, { "epoch": 0.8926896551724138, "grad_norm": 3.64241361618042, "learning_rate": 9.555299110939322e-06, "loss": 1.2715, "step": 3236 }, { "epoch": 0.8929655172413793, "grad_norm": 3.634537696838379, "learning_rate": 9.55492291781362e-06, "loss": 1.2473, "step": 3237 }, { "epoch": 0.8932413793103449, "grad_norm": 4.045560836791992, "learning_rate": 9.554546573047361e-06, "loss": 1.412, "step": 3238 }, { "epoch": 0.8935172413793103, "grad_norm": 3.5808496475219727, "learning_rate": 9.554170076653074e-06, "loss": 1.1769, "step": 3239 }, { "epoch": 0.8937931034482759, "grad_norm": 3.758955240249634, "learning_rate": 9.553793428643294e-06, "loss": 1.1623, "step": 3240 }, { "epoch": 0.8940689655172414, "grad_norm": 3.1889588832855225, "learning_rate": 9.553416629030561e-06, "loss": 1.1441, "step": 3241 }, { "epoch": 0.8943448275862069, "grad_norm": 3.1525588035583496, "learning_rate": 9.553039677827418e-06, "loss": 1.144, "step": 3242 }, { "epoch": 0.8946206896551724, "grad_norm": 3.3446285724639893, "learning_rate": 9.552662575046414e-06, "loss": 1.0799, "step": 3243 }, { "epoch": 0.894896551724138, "grad_norm": 3.594666004180908, "learning_rate": 9.552285320700105e-06, "loss": 1.2886, "step": 3244 }, { "epoch": 0.8951724137931034, "grad_norm": 3.2625536918640137, "learning_rate": 9.55190791480105e-06, "loss": 1.1512, "step": 3245 }, { "epoch": 0.895448275862069, "grad_norm": 3.9416556358337402, "learning_rate": 9.551530357361812e-06, "loss": 1.14, "step": 3246 }, { "epoch": 0.8957241379310344, "grad_norm": 3.3055360317230225, "learning_rate": 9.55115264839496e-06, "loss": 1.1879, "step": 3247 }, { "epoch": 0.896, "grad_norm": 3.9109835624694824, "learning_rate": 9.550774787913072e-06, "loss": 1.2991, "step": 3248 }, { "epoch": 0.8962758620689655, "grad_norm": 3.187643527984619, "learning_rate": 9.550396775928726e-06, "loss": 1.212, "step": 3249 }, { "epoch": 0.896551724137931, "grad_norm": 3.6677510738372803, "learning_rate": 9.550018612454504e-06, "loss": 1.2231, "step": 3250 }, { "epoch": 0.8968275862068965, "grad_norm": 3.653522253036499, "learning_rate": 9.549640297502998e-06, "loss": 1.2474, "step": 3251 }, { "epoch": 0.8971034482758621, "grad_norm": 3.8597142696380615, "learning_rate": 9.549261831086802e-06, "loss": 1.1961, "step": 3252 }, { "epoch": 0.8973793103448275, "grad_norm": 3.420867443084717, "learning_rate": 9.548883213218518e-06, "loss": 1.0795, "step": 3253 }, { "epoch": 0.8976551724137931, "grad_norm": 3.5095055103302, "learning_rate": 9.548504443910748e-06, "loss": 1.3061, "step": 3254 }, { "epoch": 0.8979310344827586, "grad_norm": 3.558098554611206, "learning_rate": 9.548125523176101e-06, "loss": 1.327, "step": 3255 }, { "epoch": 0.8982068965517241, "grad_norm": 3.505390167236328, "learning_rate": 9.547746451027197e-06, "loss": 1.1854, "step": 3256 }, { "epoch": 0.8984827586206896, "grad_norm": 3.5720434188842773, "learning_rate": 9.54736722747665e-06, "loss": 1.2889, "step": 3257 }, { "epoch": 0.8987586206896552, "grad_norm": 3.8630082607269287, "learning_rate": 9.546987852537089e-06, "loss": 1.2997, "step": 3258 }, { "epoch": 0.8990344827586206, "grad_norm": 3.7338387966156006, "learning_rate": 9.546608326221141e-06, "loss": 1.1899, "step": 3259 }, { "epoch": 0.8993103448275862, "grad_norm": 3.561171054840088, "learning_rate": 9.546228648541445e-06, "loss": 1.2196, "step": 3260 }, { "epoch": 0.8995862068965518, "grad_norm": 3.687507152557373, "learning_rate": 9.545848819510635e-06, "loss": 1.2251, "step": 3261 }, { "epoch": 0.8998620689655172, "grad_norm": 3.5308926105499268, "learning_rate": 9.545468839141363e-06, "loss": 1.2917, "step": 3262 }, { "epoch": 0.9001379310344828, "grad_norm": 3.484478712081909, "learning_rate": 9.545088707446273e-06, "loss": 1.2028, "step": 3263 }, { "epoch": 0.9004137931034483, "grad_norm": 3.630136251449585, "learning_rate": 9.544708424438026e-06, "loss": 1.3273, "step": 3264 }, { "epoch": 0.9006896551724138, "grad_norm": 3.932755708694458, "learning_rate": 9.544327990129277e-06, "loss": 1.2899, "step": 3265 }, { "epoch": 0.9009655172413793, "grad_norm": 3.4232754707336426, "learning_rate": 9.543947404532694e-06, "loss": 1.1662, "step": 3266 }, { "epoch": 0.9012413793103449, "grad_norm": 3.8653576374053955, "learning_rate": 9.543566667660946e-06, "loss": 1.2426, "step": 3267 }, { "epoch": 0.9015172413793103, "grad_norm": 3.310248851776123, "learning_rate": 9.543185779526711e-06, "loss": 1.1832, "step": 3268 }, { "epoch": 0.9017931034482759, "grad_norm": 3.2815890312194824, "learning_rate": 9.542804740142667e-06, "loss": 1.2374, "step": 3269 }, { "epoch": 0.9020689655172414, "grad_norm": 3.2534725666046143, "learning_rate": 9.542423549521498e-06, "loss": 1.1018, "step": 3270 }, { "epoch": 0.9023448275862069, "grad_norm": 3.5189850330352783, "learning_rate": 9.542042207675898e-06, "loss": 1.1783, "step": 3271 }, { "epoch": 0.9026206896551724, "grad_norm": 3.3270115852355957, "learning_rate": 9.541660714618561e-06, "loss": 1.2234, "step": 3272 }, { "epoch": 0.902896551724138, "grad_norm": 3.6889684200286865, "learning_rate": 9.541279070362185e-06, "loss": 1.1429, "step": 3273 }, { "epoch": 0.9031724137931034, "grad_norm": 3.935500383377075, "learning_rate": 9.54089727491948e-06, "loss": 1.2563, "step": 3274 }, { "epoch": 0.903448275862069, "grad_norm": 3.554086208343506, "learning_rate": 9.540515328303154e-06, "loss": 1.316, "step": 3275 }, { "epoch": 0.9037241379310345, "grad_norm": 3.4604620933532715, "learning_rate": 9.540133230525923e-06, "loss": 1.1836, "step": 3276 }, { "epoch": 0.904, "grad_norm": 3.6154651641845703, "learning_rate": 9.539750981600509e-06, "loss": 1.2219, "step": 3277 }, { "epoch": 0.9042758620689655, "grad_norm": 3.3767030239105225, "learning_rate": 9.539368581539634e-06, "loss": 1.1877, "step": 3278 }, { "epoch": 0.9045517241379311, "grad_norm": 4.239593029022217, "learning_rate": 9.538986030356033e-06, "loss": 1.4593, "step": 3279 }, { "epoch": 0.9048275862068965, "grad_norm": 3.761147975921631, "learning_rate": 9.538603328062437e-06, "loss": 1.2439, "step": 3280 }, { "epoch": 0.9051034482758621, "grad_norm": 3.4682462215423584, "learning_rate": 9.538220474671591e-06, "loss": 1.0582, "step": 3281 }, { "epoch": 0.9053793103448275, "grad_norm": 3.450385093688965, "learning_rate": 9.53783747019624e-06, "loss": 1.2323, "step": 3282 }, { "epoch": 0.9056551724137931, "grad_norm": 3.9318337440490723, "learning_rate": 9.537454314649134e-06, "loss": 1.2898, "step": 3283 }, { "epoch": 0.9059310344827586, "grad_norm": 3.4705021381378174, "learning_rate": 9.537071008043028e-06, "loss": 1.2101, "step": 3284 }, { "epoch": 0.9062068965517242, "grad_norm": 3.6316657066345215, "learning_rate": 9.536687550390683e-06, "loss": 1.14, "step": 3285 }, { "epoch": 0.9064827586206896, "grad_norm": 3.602717161178589, "learning_rate": 9.53630394170487e-06, "loss": 1.233, "step": 3286 }, { "epoch": 0.9067586206896552, "grad_norm": 3.4074554443359375, "learning_rate": 9.535920181998352e-06, "loss": 1.2986, "step": 3287 }, { "epoch": 0.9070344827586206, "grad_norm": 3.5979628562927246, "learning_rate": 9.53553627128391e-06, "loss": 1.1658, "step": 3288 }, { "epoch": 0.9073103448275862, "grad_norm": 3.655179738998413, "learning_rate": 9.535152209574322e-06, "loss": 1.303, "step": 3289 }, { "epoch": 0.9075862068965517, "grad_norm": 3.8810410499572754, "learning_rate": 9.534767996882376e-06, "loss": 1.2541, "step": 3290 }, { "epoch": 0.9078620689655172, "grad_norm": 3.8721954822540283, "learning_rate": 9.534383633220865e-06, "loss": 1.4108, "step": 3291 }, { "epoch": 0.9081379310344827, "grad_norm": 3.6471264362335205, "learning_rate": 9.533999118602581e-06, "loss": 1.2964, "step": 3292 }, { "epoch": 0.9084137931034483, "grad_norm": 3.2968616485595703, "learning_rate": 9.533614453040327e-06, "loss": 1.1116, "step": 3293 }, { "epoch": 0.9086896551724138, "grad_norm": 3.442882776260376, "learning_rate": 9.53322963654691e-06, "loss": 1.1457, "step": 3294 }, { "epoch": 0.9089655172413793, "grad_norm": 3.3832719326019287, "learning_rate": 9.532844669135139e-06, "loss": 1.1776, "step": 3295 }, { "epoch": 0.9092413793103449, "grad_norm": 3.2513577938079834, "learning_rate": 9.532459550817831e-06, "loss": 1.1135, "step": 3296 }, { "epoch": 0.9095172413793103, "grad_norm": 3.476329803466797, "learning_rate": 9.532074281607808e-06, "loss": 1.2618, "step": 3297 }, { "epoch": 0.9097931034482759, "grad_norm": 3.647240161895752, "learning_rate": 9.531688861517896e-06, "loss": 1.2518, "step": 3298 }, { "epoch": 0.9100689655172414, "grad_norm": 3.5120785236358643, "learning_rate": 9.531303290560927e-06, "loss": 1.3171, "step": 3299 }, { "epoch": 0.9103448275862069, "grad_norm": 3.149855613708496, "learning_rate": 9.530917568749734e-06, "loss": 1.1108, "step": 3300 }, { "epoch": 0.9106206896551724, "grad_norm": 3.742382287979126, "learning_rate": 9.530531696097163e-06, "loss": 1.2006, "step": 3301 }, { "epoch": 0.910896551724138, "grad_norm": 3.767286539077759, "learning_rate": 9.530145672616056e-06, "loss": 1.3215, "step": 3302 }, { "epoch": 0.9111724137931034, "grad_norm": 3.6806445121765137, "learning_rate": 9.529759498319267e-06, "loss": 1.3463, "step": 3303 }, { "epoch": 0.911448275862069, "grad_norm": 3.2295303344726562, "learning_rate": 9.52937317321965e-06, "loss": 1.0249, "step": 3304 }, { "epoch": 0.9117241379310345, "grad_norm": 3.6569387912750244, "learning_rate": 9.528986697330068e-06, "loss": 1.289, "step": 3305 }, { "epoch": 0.912, "grad_norm": 3.4238028526306152, "learning_rate": 9.52860007066339e-06, "loss": 1.2172, "step": 3306 }, { "epoch": 0.9122758620689655, "grad_norm": 3.4515888690948486, "learning_rate": 9.528213293232483e-06, "loss": 1.3469, "step": 3307 }, { "epoch": 0.9125517241379311, "grad_norm": 3.3795971870422363, "learning_rate": 9.527826365050226e-06, "loss": 1.3286, "step": 3308 }, { "epoch": 0.9128275862068965, "grad_norm": 3.477057933807373, "learning_rate": 9.527439286129498e-06, "loss": 1.2711, "step": 3309 }, { "epoch": 0.9131034482758621, "grad_norm": 3.4935476779937744, "learning_rate": 9.527052056483189e-06, "loss": 1.3385, "step": 3310 }, { "epoch": 0.9133793103448276, "grad_norm": 3.3817391395568848, "learning_rate": 9.526664676124187e-06, "loss": 1.2845, "step": 3311 }, { "epoch": 0.9136551724137931, "grad_norm": 3.4680447578430176, "learning_rate": 9.52627714506539e-06, "loss": 1.1931, "step": 3312 }, { "epoch": 0.9139310344827586, "grad_norm": 3.7710587978363037, "learning_rate": 9.5258894633197e-06, "loss": 1.4516, "step": 3313 }, { "epoch": 0.9142068965517242, "grad_norm": 3.3039517402648926, "learning_rate": 9.525501630900022e-06, "loss": 1.2605, "step": 3314 }, { "epoch": 0.9144827586206896, "grad_norm": 3.4023194313049316, "learning_rate": 9.52511364781927e-06, "loss": 1.1822, "step": 3315 }, { "epoch": 0.9147586206896552, "grad_norm": 3.9652934074401855, "learning_rate": 9.524725514090358e-06, "loss": 1.3692, "step": 3316 }, { "epoch": 0.9150344827586206, "grad_norm": 3.539841413497925, "learning_rate": 9.52433722972621e-06, "loss": 1.2114, "step": 3317 }, { "epoch": 0.9153103448275862, "grad_norm": 3.561629056930542, "learning_rate": 9.52394879473975e-06, "loss": 1.0501, "step": 3318 }, { "epoch": 0.9155862068965517, "grad_norm": 3.4938480854034424, "learning_rate": 9.523560209143914e-06, "loss": 1.2187, "step": 3319 }, { "epoch": 0.9158620689655173, "grad_norm": 3.818613052368164, "learning_rate": 9.523171472951633e-06, "loss": 1.2487, "step": 3320 }, { "epoch": 0.9161379310344827, "grad_norm": 3.6208553314208984, "learning_rate": 9.522782586175852e-06, "loss": 1.3034, "step": 3321 }, { "epoch": 0.9164137931034483, "grad_norm": 3.5639936923980713, "learning_rate": 9.522393548829516e-06, "loss": 1.1731, "step": 3322 }, { "epoch": 0.9166896551724137, "grad_norm": 3.8766627311706543, "learning_rate": 9.52200436092558e-06, "loss": 1.3146, "step": 3323 }, { "epoch": 0.9169655172413793, "grad_norm": 3.2348320484161377, "learning_rate": 9.521615022476995e-06, "loss": 1.2928, "step": 3324 }, { "epoch": 0.9172413793103448, "grad_norm": 3.7564070224761963, "learning_rate": 9.521225533496727e-06, "loss": 1.2279, "step": 3325 }, { "epoch": 0.9175172413793103, "grad_norm": 3.9637703895568848, "learning_rate": 9.520835893997742e-06, "loss": 1.3085, "step": 3326 }, { "epoch": 0.9177931034482759, "grad_norm": 3.6010568141937256, "learning_rate": 9.520446103993014e-06, "loss": 1.1987, "step": 3327 }, { "epoch": 0.9180689655172414, "grad_norm": 3.9172518253326416, "learning_rate": 9.520056163495513e-06, "loss": 1.1681, "step": 3328 }, { "epoch": 0.918344827586207, "grad_norm": 3.5027129650115967, "learning_rate": 9.519666072518226e-06, "loss": 1.2322, "step": 3329 }, { "epoch": 0.9186206896551724, "grad_norm": 3.4788081645965576, "learning_rate": 9.519275831074139e-06, "loss": 1.2611, "step": 3330 }, { "epoch": 0.918896551724138, "grad_norm": 4.064442157745361, "learning_rate": 9.518885439176245e-06, "loss": 1.3491, "step": 3331 }, { "epoch": 0.9191724137931034, "grad_norm": 3.6261701583862305, "learning_rate": 9.518494896837535e-06, "loss": 1.2142, "step": 3332 }, { "epoch": 0.919448275862069, "grad_norm": 3.887685775756836, "learning_rate": 9.518104204071019e-06, "loss": 1.2536, "step": 3333 }, { "epoch": 0.9197241379310345, "grad_norm": 3.5417449474334717, "learning_rate": 9.517713360889696e-06, "loss": 1.1875, "step": 3334 }, { "epoch": 0.92, "grad_norm": 3.568927049636841, "learning_rate": 9.517322367306584e-06, "loss": 1.3023, "step": 3335 }, { "epoch": 0.9202758620689655, "grad_norm": 3.5786118507385254, "learning_rate": 9.516931223334696e-06, "loss": 1.2032, "step": 3336 }, { "epoch": 0.9205517241379311, "grad_norm": 3.667506456375122, "learning_rate": 9.516539928987057e-06, "loss": 1.231, "step": 3337 }, { "epoch": 0.9208275862068965, "grad_norm": 3.378565788269043, "learning_rate": 9.516148484276688e-06, "loss": 1.3803, "step": 3338 }, { "epoch": 0.9211034482758621, "grad_norm": 3.588503837585449, "learning_rate": 9.515756889216627e-06, "loss": 1.1888, "step": 3339 }, { "epoch": 0.9213793103448276, "grad_norm": 3.536032199859619, "learning_rate": 9.515365143819909e-06, "loss": 1.2076, "step": 3340 }, { "epoch": 0.9216551724137931, "grad_norm": 3.513190984725952, "learning_rate": 9.514973248099575e-06, "loss": 1.2429, "step": 3341 }, { "epoch": 0.9219310344827586, "grad_norm": 3.4865922927856445, "learning_rate": 9.514581202068671e-06, "loss": 1.2226, "step": 3342 }, { "epoch": 0.9222068965517242, "grad_norm": 3.342750310897827, "learning_rate": 9.51418900574025e-06, "loss": 1.1628, "step": 3343 }, { "epoch": 0.9224827586206896, "grad_norm": 3.6579411029815674, "learning_rate": 9.51379665912737e-06, "loss": 1.2831, "step": 3344 }, { "epoch": 0.9227586206896552, "grad_norm": 3.84033465385437, "learning_rate": 9.51340416224309e-06, "loss": 1.3142, "step": 3345 }, { "epoch": 0.9230344827586207, "grad_norm": 3.282477617263794, "learning_rate": 9.513011515100478e-06, "loss": 1.194, "step": 3346 }, { "epoch": 0.9233103448275862, "grad_norm": 3.5173866748809814, "learning_rate": 9.512618717712609e-06, "loss": 1.2696, "step": 3347 }, { "epoch": 0.9235862068965517, "grad_norm": 4.152708530426025, "learning_rate": 9.512225770092556e-06, "loss": 1.1986, "step": 3348 }, { "epoch": 0.9238620689655173, "grad_norm": 3.4464643001556396, "learning_rate": 9.511832672253401e-06, "loss": 1.232, "step": 3349 }, { "epoch": 0.9241379310344827, "grad_norm": 3.473694324493408, "learning_rate": 9.511439424208232e-06, "loss": 1.2745, "step": 3350 }, { "epoch": 0.9244137931034483, "grad_norm": 3.6259853839874268, "learning_rate": 9.51104602597014e-06, "loss": 1.2126, "step": 3351 }, { "epoch": 0.9246896551724137, "grad_norm": 3.315905809402466, "learning_rate": 9.510652477552225e-06, "loss": 1.1398, "step": 3352 }, { "epoch": 0.9249655172413793, "grad_norm": 3.509671211242676, "learning_rate": 9.510258778967583e-06, "loss": 1.1748, "step": 3353 }, { "epoch": 0.9252413793103448, "grad_norm": 3.726743221282959, "learning_rate": 9.509864930229326e-06, "loss": 1.1289, "step": 3354 }, { "epoch": 0.9255172413793104, "grad_norm": 3.5206029415130615, "learning_rate": 9.509470931350565e-06, "loss": 1.1734, "step": 3355 }, { "epoch": 0.9257931034482758, "grad_norm": 3.7925117015838623, "learning_rate": 9.509076782344415e-06, "loss": 1.3279, "step": 3356 }, { "epoch": 0.9260689655172414, "grad_norm": 3.5740559101104736, "learning_rate": 9.508682483223997e-06, "loss": 1.2061, "step": 3357 }, { "epoch": 0.9263448275862068, "grad_norm": 3.2203423976898193, "learning_rate": 9.508288034002443e-06, "loss": 1.1169, "step": 3358 }, { "epoch": 0.9266206896551724, "grad_norm": 3.43625807762146, "learning_rate": 9.507893434692879e-06, "loss": 1.1598, "step": 3359 }, { "epoch": 0.926896551724138, "grad_norm": 3.191890239715576, "learning_rate": 9.507498685308443e-06, "loss": 1.2005, "step": 3360 }, { "epoch": 0.9271724137931034, "grad_norm": 3.5012378692626953, "learning_rate": 9.50710378586228e-06, "loss": 1.2206, "step": 3361 }, { "epoch": 0.927448275862069, "grad_norm": 3.0586841106414795, "learning_rate": 9.506708736367534e-06, "loss": 1.0694, "step": 3362 }, { "epoch": 0.9277241379310345, "grad_norm": 3.568995952606201, "learning_rate": 9.506313536837359e-06, "loss": 1.2115, "step": 3363 }, { "epoch": 0.928, "grad_norm": 3.2929015159606934, "learning_rate": 9.50591818728491e-06, "loss": 1.2116, "step": 3364 }, { "epoch": 0.9282758620689655, "grad_norm": 3.7912490367889404, "learning_rate": 9.505522687723347e-06, "loss": 1.2134, "step": 3365 }, { "epoch": 0.9285517241379311, "grad_norm": 3.7560741901397705, "learning_rate": 9.505127038165843e-06, "loss": 1.3458, "step": 3366 }, { "epoch": 0.9288275862068965, "grad_norm": 3.946617364883423, "learning_rate": 9.504731238625563e-06, "loss": 1.315, "step": 3367 }, { "epoch": 0.9291034482758621, "grad_norm": 3.8524794578552246, "learning_rate": 9.504335289115686e-06, "loss": 1.4905, "step": 3368 }, { "epoch": 0.9293793103448276, "grad_norm": 3.505997896194458, "learning_rate": 9.503939189649398e-06, "loss": 1.1963, "step": 3369 }, { "epoch": 0.9296551724137931, "grad_norm": 3.3819613456726074, "learning_rate": 9.50354294023988e-06, "loss": 1.2586, "step": 3370 }, { "epoch": 0.9299310344827586, "grad_norm": 3.8025317192077637, "learning_rate": 9.503146540900327e-06, "loss": 1.2366, "step": 3371 }, { "epoch": 0.9302068965517242, "grad_norm": 3.853518486022949, "learning_rate": 9.502749991643933e-06, "loss": 1.3123, "step": 3372 }, { "epoch": 0.9304827586206896, "grad_norm": 3.478586435317993, "learning_rate": 9.502353292483904e-06, "loss": 1.1779, "step": 3373 }, { "epoch": 0.9307586206896552, "grad_norm": 3.2229602336883545, "learning_rate": 9.501956443433441e-06, "loss": 1.0955, "step": 3374 }, { "epoch": 0.9310344827586207, "grad_norm": 3.5736241340637207, "learning_rate": 9.501559444505762e-06, "loss": 1.2373, "step": 3375 }, { "epoch": 0.9313103448275862, "grad_norm": 3.8395535945892334, "learning_rate": 9.50116229571408e-06, "loss": 1.2473, "step": 3376 }, { "epoch": 0.9315862068965517, "grad_norm": 3.8263370990753174, "learning_rate": 9.500764997071617e-06, "loss": 1.0984, "step": 3377 }, { "epoch": 0.9318620689655173, "grad_norm": 3.47918701171875, "learning_rate": 9.500367548591601e-06, "loss": 1.1545, "step": 3378 }, { "epoch": 0.9321379310344827, "grad_norm": 3.5056564807891846, "learning_rate": 9.499969950287262e-06, "loss": 1.2908, "step": 3379 }, { "epoch": 0.9324137931034483, "grad_norm": 3.572584390640259, "learning_rate": 9.499572202171836e-06, "loss": 1.1874, "step": 3380 }, { "epoch": 0.9326896551724138, "grad_norm": 3.3086278438568115, "learning_rate": 9.499174304258569e-06, "loss": 1.0964, "step": 3381 }, { "epoch": 0.9329655172413793, "grad_norm": 3.3686294555664062, "learning_rate": 9.498776256560702e-06, "loss": 1.1869, "step": 3382 }, { "epoch": 0.9332413793103448, "grad_norm": 3.3300092220306396, "learning_rate": 9.498378059091492e-06, "loss": 1.155, "step": 3383 }, { "epoch": 0.9335172413793104, "grad_norm": 3.3537564277648926, "learning_rate": 9.49797971186419e-06, "loss": 1.3127, "step": 3384 }, { "epoch": 0.9337931034482758, "grad_norm": 3.5780484676361084, "learning_rate": 9.497581214892064e-06, "loss": 1.1766, "step": 3385 }, { "epoch": 0.9340689655172414, "grad_norm": 3.597416639328003, "learning_rate": 9.497182568188376e-06, "loss": 1.3043, "step": 3386 }, { "epoch": 0.9343448275862068, "grad_norm": 3.7291901111602783, "learning_rate": 9.496783771766398e-06, "loss": 1.2043, "step": 3387 }, { "epoch": 0.9346206896551724, "grad_norm": 3.5500376224517822, "learning_rate": 9.496384825639408e-06, "loss": 1.088, "step": 3388 }, { "epoch": 0.9348965517241379, "grad_norm": 3.487741231918335, "learning_rate": 9.495985729820686e-06, "loss": 1.2456, "step": 3389 }, { "epoch": 0.9351724137931035, "grad_norm": 3.499504327774048, "learning_rate": 9.495586484323522e-06, "loss": 1.1522, "step": 3390 }, { "epoch": 0.9354482758620689, "grad_norm": 4.066355228424072, "learning_rate": 9.495187089161203e-06, "loss": 1.2191, "step": 3391 }, { "epoch": 0.9357241379310345, "grad_norm": 3.9205806255340576, "learning_rate": 9.494787544347028e-06, "loss": 1.1284, "step": 3392 }, { "epoch": 0.936, "grad_norm": 3.055371046066284, "learning_rate": 9.494387849894297e-06, "loss": 1.0348, "step": 3393 }, { "epoch": 0.9362758620689655, "grad_norm": 3.5206425189971924, "learning_rate": 9.49398800581632e-06, "loss": 1.1295, "step": 3394 }, { "epoch": 0.9365517241379311, "grad_norm": 3.240097999572754, "learning_rate": 9.493588012126402e-06, "loss": 1.066, "step": 3395 }, { "epoch": 0.9368275862068965, "grad_norm": 4.1352715492248535, "learning_rate": 9.493187868837866e-06, "loss": 1.2731, "step": 3396 }, { "epoch": 0.9371034482758621, "grad_norm": 3.5494067668914795, "learning_rate": 9.49278757596403e-06, "loss": 1.3204, "step": 3397 }, { "epoch": 0.9373793103448276, "grad_norm": 3.251917839050293, "learning_rate": 9.492387133518222e-06, "loss": 1.1655, "step": 3398 }, { "epoch": 0.9376551724137931, "grad_norm": 4.09898042678833, "learning_rate": 9.49198654151377e-06, "loss": 1.292, "step": 3399 }, { "epoch": 0.9379310344827586, "grad_norm": 3.540454387664795, "learning_rate": 9.491585799964014e-06, "loss": 1.3362, "step": 3400 }, { "epoch": 0.9382068965517242, "grad_norm": 3.8736984729766846, "learning_rate": 9.491184908882293e-06, "loss": 1.1542, "step": 3401 }, { "epoch": 0.9384827586206896, "grad_norm": 3.3323354721069336, "learning_rate": 9.490783868281957e-06, "loss": 1.1636, "step": 3402 }, { "epoch": 0.9387586206896552, "grad_norm": 3.201117753982544, "learning_rate": 9.490382678176354e-06, "loss": 1.2613, "step": 3403 }, { "epoch": 0.9390344827586207, "grad_norm": 4.120163440704346, "learning_rate": 9.489981338578839e-06, "loss": 1.3882, "step": 3404 }, { "epoch": 0.9393103448275862, "grad_norm": 3.3748340606689453, "learning_rate": 9.489579849502776e-06, "loss": 1.3813, "step": 3405 }, { "epoch": 0.9395862068965517, "grad_norm": 3.8986823558807373, "learning_rate": 9.48917821096153e-06, "loss": 1.1271, "step": 3406 }, { "epoch": 0.9398620689655173, "grad_norm": 3.3159306049346924, "learning_rate": 9.488776422968473e-06, "loss": 1.1608, "step": 3407 }, { "epoch": 0.9401379310344827, "grad_norm": 3.473314046859741, "learning_rate": 9.48837448553698e-06, "loss": 1.2138, "step": 3408 }, { "epoch": 0.9404137931034483, "grad_norm": 3.698357343673706, "learning_rate": 9.487972398680434e-06, "loss": 1.227, "step": 3409 }, { "epoch": 0.9406896551724138, "grad_norm": 3.481801986694336, "learning_rate": 9.487570162412218e-06, "loss": 1.2671, "step": 3410 }, { "epoch": 0.9409655172413793, "grad_norm": 3.338684558868408, "learning_rate": 9.487167776745726e-06, "loss": 1.2027, "step": 3411 }, { "epoch": 0.9412413793103448, "grad_norm": 3.487919330596924, "learning_rate": 9.486765241694353e-06, "loss": 1.2157, "step": 3412 }, { "epoch": 0.9415172413793104, "grad_norm": 3.534520387649536, "learning_rate": 9.4863625572715e-06, "loss": 1.1374, "step": 3413 }, { "epoch": 0.9417931034482758, "grad_norm": 3.642444133758545, "learning_rate": 9.485959723490573e-06, "loss": 1.2198, "step": 3414 }, { "epoch": 0.9420689655172414, "grad_norm": 3.5072379112243652, "learning_rate": 9.485556740364983e-06, "loss": 1.2016, "step": 3415 }, { "epoch": 0.9423448275862069, "grad_norm": 3.4812307357788086, "learning_rate": 9.485153607908147e-06, "loss": 1.3031, "step": 3416 }, { "epoch": 0.9426206896551724, "grad_norm": 3.2485828399658203, "learning_rate": 9.484750326133483e-06, "loss": 1.1801, "step": 3417 }, { "epoch": 0.9428965517241379, "grad_norm": 3.604590654373169, "learning_rate": 9.484346895054419e-06, "loss": 1.3019, "step": 3418 }, { "epoch": 0.9431724137931035, "grad_norm": 3.3564982414245605, "learning_rate": 9.483943314684387e-06, "loss": 1.3688, "step": 3419 }, { "epoch": 0.9434482758620689, "grad_norm": 3.583498477935791, "learning_rate": 9.483539585036819e-06, "loss": 1.2911, "step": 3420 }, { "epoch": 0.9437241379310345, "grad_norm": 3.2844698429107666, "learning_rate": 9.48313570612516e-06, "loss": 1.2557, "step": 3421 }, { "epoch": 0.944, "grad_norm": 3.6082308292388916, "learning_rate": 9.482731677962855e-06, "loss": 1.2623, "step": 3422 }, { "epoch": 0.9442758620689655, "grad_norm": 3.6027450561523438, "learning_rate": 9.482327500563352e-06, "loss": 1.1226, "step": 3423 }, { "epoch": 0.944551724137931, "grad_norm": 3.36574125289917, "learning_rate": 9.481923173940109e-06, "loss": 1.1836, "step": 3424 }, { "epoch": 0.9448275862068966, "grad_norm": 3.040607452392578, "learning_rate": 9.481518698106586e-06, "loss": 1.1773, "step": 3425 }, { "epoch": 0.9451034482758621, "grad_norm": 3.5807344913482666, "learning_rate": 9.481114073076248e-06, "loss": 1.3322, "step": 3426 }, { "epoch": 0.9453793103448276, "grad_norm": 3.042515754699707, "learning_rate": 9.480709298862567e-06, "loss": 1.2041, "step": 3427 }, { "epoch": 0.9456551724137932, "grad_norm": 3.545771598815918, "learning_rate": 9.480304375479018e-06, "loss": 1.0807, "step": 3428 }, { "epoch": 0.9459310344827586, "grad_norm": 3.5390193462371826, "learning_rate": 9.479899302939082e-06, "loss": 1.4386, "step": 3429 }, { "epoch": 0.9462068965517242, "grad_norm": 3.9690260887145996, "learning_rate": 9.479494081256242e-06, "loss": 1.2353, "step": 3430 }, { "epoch": 0.9464827586206896, "grad_norm": 3.7808403968811035, "learning_rate": 9.479088710443991e-06, "loss": 1.2382, "step": 3431 }, { "epoch": 0.9467586206896552, "grad_norm": 3.9080679416656494, "learning_rate": 9.478683190515824e-06, "loss": 1.3451, "step": 3432 }, { "epoch": 0.9470344827586207, "grad_norm": 3.5145535469055176, "learning_rate": 9.478277521485244e-06, "loss": 1.339, "step": 3433 }, { "epoch": 0.9473103448275862, "grad_norm": 3.395549774169922, "learning_rate": 9.477871703365751e-06, "loss": 1.3016, "step": 3434 }, { "epoch": 0.9475862068965517, "grad_norm": 3.5683188438415527, "learning_rate": 9.477465736170857e-06, "loss": 1.2559, "step": 3435 }, { "epoch": 0.9478620689655173, "grad_norm": 3.6071155071258545, "learning_rate": 9.47705961991408e-06, "loss": 1.3537, "step": 3436 }, { "epoch": 0.9481379310344827, "grad_norm": 3.6055331230163574, "learning_rate": 9.476653354608937e-06, "loss": 1.2039, "step": 3437 }, { "epoch": 0.9484137931034483, "grad_norm": 3.5202877521514893, "learning_rate": 9.476246940268955e-06, "loss": 1.1629, "step": 3438 }, { "epoch": 0.9486896551724138, "grad_norm": 3.540191411972046, "learning_rate": 9.475840376907665e-06, "loss": 1.2241, "step": 3439 }, { "epoch": 0.9489655172413793, "grad_norm": 3.5013465881347656, "learning_rate": 9.4754336645386e-06, "loss": 1.215, "step": 3440 }, { "epoch": 0.9492413793103448, "grad_norm": 3.469759225845337, "learning_rate": 9.475026803175302e-06, "loss": 1.1626, "step": 3441 }, { "epoch": 0.9495172413793104, "grad_norm": 3.17356014251709, "learning_rate": 9.474619792831315e-06, "loss": 1.3289, "step": 3442 }, { "epoch": 0.9497931034482758, "grad_norm": 3.6715493202209473, "learning_rate": 9.47421263352019e-06, "loss": 1.1702, "step": 3443 }, { "epoch": 0.9500689655172414, "grad_norm": 3.3697917461395264, "learning_rate": 9.473805325255479e-06, "loss": 1.1622, "step": 3444 }, { "epoch": 0.9503448275862069, "grad_norm": 3.4436872005462646, "learning_rate": 9.473397868050745e-06, "loss": 1.3441, "step": 3445 }, { "epoch": 0.9506206896551724, "grad_norm": 3.5681867599487305, "learning_rate": 9.472990261919552e-06, "loss": 1.3703, "step": 3446 }, { "epoch": 0.9508965517241379, "grad_norm": 3.531599998474121, "learning_rate": 9.47258250687547e-06, "loss": 1.2682, "step": 3447 }, { "epoch": 0.9511724137931035, "grad_norm": 3.266024589538574, "learning_rate": 9.472174602932071e-06, "loss": 1.1615, "step": 3448 }, { "epoch": 0.9514482758620689, "grad_norm": 3.694646120071411, "learning_rate": 9.47176655010294e-06, "loss": 1.1281, "step": 3449 }, { "epoch": 0.9517241379310345, "grad_norm": 3.6283867359161377, "learning_rate": 9.471358348401659e-06, "loss": 1.2037, "step": 3450 }, { "epoch": 0.952, "grad_norm": 3.2477259635925293, "learning_rate": 9.470949997841817e-06, "loss": 1.1906, "step": 3451 }, { "epoch": 0.9522758620689655, "grad_norm": 3.4408860206604004, "learning_rate": 9.47054149843701e-06, "loss": 1.1653, "step": 3452 }, { "epoch": 0.952551724137931, "grad_norm": 3.83548903465271, "learning_rate": 9.470132850200837e-06, "loss": 1.2722, "step": 3453 }, { "epoch": 0.9528275862068966, "grad_norm": 3.6603870391845703, "learning_rate": 9.469724053146902e-06, "loss": 1.3879, "step": 3454 }, { "epoch": 0.953103448275862, "grad_norm": 4.180979251861572, "learning_rate": 9.469315107288815e-06, "loss": 1.2709, "step": 3455 }, { "epoch": 0.9533793103448276, "grad_norm": 3.5449063777923584, "learning_rate": 9.468906012640191e-06, "loss": 1.2295, "step": 3456 }, { "epoch": 0.953655172413793, "grad_norm": 3.087428331375122, "learning_rate": 9.46849676921465e-06, "loss": 1.0676, "step": 3457 }, { "epoch": 0.9539310344827586, "grad_norm": 3.112003803253174, "learning_rate": 9.468087377025813e-06, "loss": 1.1492, "step": 3458 }, { "epoch": 0.9542068965517242, "grad_norm": 3.5995404720306396, "learning_rate": 9.467677836087313e-06, "loss": 1.1422, "step": 3459 }, { "epoch": 0.9544827586206897, "grad_norm": 3.5162417888641357, "learning_rate": 9.467268146412783e-06, "loss": 1.3246, "step": 3460 }, { "epoch": 0.9547586206896552, "grad_norm": 3.4346017837524414, "learning_rate": 9.46685830801586e-06, "loss": 1.2474, "step": 3461 }, { "epoch": 0.9550344827586207, "grad_norm": 3.2554805278778076, "learning_rate": 9.466448320910192e-06, "loss": 1.1318, "step": 3462 }, { "epoch": 0.9553103448275863, "grad_norm": 3.670562744140625, "learning_rate": 9.466038185109424e-06, "loss": 1.3259, "step": 3463 }, { "epoch": 0.9555862068965517, "grad_norm": 3.494460105895996, "learning_rate": 9.465627900627216e-06, "loss": 1.2406, "step": 3464 }, { "epoch": 0.9558620689655173, "grad_norm": 3.3145344257354736, "learning_rate": 9.465217467477221e-06, "loss": 1.2302, "step": 3465 }, { "epoch": 0.9561379310344827, "grad_norm": 3.8280863761901855, "learning_rate": 9.464806885673106e-06, "loss": 1.3266, "step": 3466 }, { "epoch": 0.9564137931034483, "grad_norm": 3.6968624591827393, "learning_rate": 9.46439615522854e-06, "loss": 1.278, "step": 3467 }, { "epoch": 0.9566896551724138, "grad_norm": 3.525697946548462, "learning_rate": 9.463985276157195e-06, "loss": 1.1784, "step": 3468 }, { "epoch": 0.9569655172413793, "grad_norm": 3.5208945274353027, "learning_rate": 9.46357424847275e-06, "loss": 1.2706, "step": 3469 }, { "epoch": 0.9572413793103448, "grad_norm": 3.070911169052124, "learning_rate": 9.463163072188891e-06, "loss": 1.0999, "step": 3470 }, { "epoch": 0.9575172413793104, "grad_norm": 3.703552007675171, "learning_rate": 9.462751747319305e-06, "loss": 1.2434, "step": 3471 }, { "epoch": 0.9577931034482758, "grad_norm": 3.8232662677764893, "learning_rate": 9.462340273877687e-06, "loss": 1.3092, "step": 3472 }, { "epoch": 0.9580689655172414, "grad_norm": 3.57548451423645, "learning_rate": 9.461928651877733e-06, "loss": 1.1072, "step": 3473 }, { "epoch": 0.9583448275862069, "grad_norm": 3.864710807800293, "learning_rate": 9.46151688133315e-06, "loss": 1.3491, "step": 3474 }, { "epoch": 0.9586206896551724, "grad_norm": 3.537391424179077, "learning_rate": 9.461104962257645e-06, "loss": 1.2943, "step": 3475 }, { "epoch": 0.9588965517241379, "grad_norm": 3.6635613441467285, "learning_rate": 9.460692894664929e-06, "loss": 1.2367, "step": 3476 }, { "epoch": 0.9591724137931035, "grad_norm": 3.8849120140075684, "learning_rate": 9.460280678568724e-06, "loss": 1.3215, "step": 3477 }, { "epoch": 0.9594482758620689, "grad_norm": 3.575892925262451, "learning_rate": 9.459868313982752e-06, "loss": 1.3039, "step": 3478 }, { "epoch": 0.9597241379310345, "grad_norm": 3.6118175983428955, "learning_rate": 9.45945580092074e-06, "loss": 1.3111, "step": 3479 }, { "epoch": 0.96, "grad_norm": 3.3316550254821777, "learning_rate": 9.459043139396424e-06, "loss": 1.2137, "step": 3480 }, { "epoch": 0.9602758620689655, "grad_norm": 3.250851631164551, "learning_rate": 9.458630329423539e-06, "loss": 1.1863, "step": 3481 }, { "epoch": 0.960551724137931, "grad_norm": 3.5795419216156006, "learning_rate": 9.45821737101583e-06, "loss": 1.2691, "step": 3482 }, { "epoch": 0.9608275862068966, "grad_norm": 3.5897202491760254, "learning_rate": 9.457804264187044e-06, "loss": 1.2211, "step": 3483 }, { "epoch": 0.961103448275862, "grad_norm": 3.061931610107422, "learning_rate": 9.457391008950935e-06, "loss": 1.1716, "step": 3484 }, { "epoch": 0.9613793103448276, "grad_norm": 3.524273633956909, "learning_rate": 9.45697760532126e-06, "loss": 1.1974, "step": 3485 }, { "epoch": 0.9616551724137931, "grad_norm": 3.456878662109375, "learning_rate": 9.456564053311783e-06, "loss": 1.2691, "step": 3486 }, { "epoch": 0.9619310344827586, "grad_norm": 3.40177845954895, "learning_rate": 9.45615035293627e-06, "loss": 1.2589, "step": 3487 }, { "epoch": 0.9622068965517241, "grad_norm": 3.49916934967041, "learning_rate": 9.455736504208497e-06, "loss": 1.1894, "step": 3488 }, { "epoch": 0.9624827586206897, "grad_norm": 3.365966558456421, "learning_rate": 9.45532250714224e-06, "loss": 1.3288, "step": 3489 }, { "epoch": 0.9627586206896551, "grad_norm": 3.394254446029663, "learning_rate": 9.454908361751278e-06, "loss": 1.291, "step": 3490 }, { "epoch": 0.9630344827586207, "grad_norm": 3.341332197189331, "learning_rate": 9.454494068049405e-06, "loss": 1.239, "step": 3491 }, { "epoch": 0.9633103448275863, "grad_norm": 3.7733471393585205, "learning_rate": 9.454079626050407e-06, "loss": 1.148, "step": 3492 }, { "epoch": 0.9635862068965517, "grad_norm": 3.4626755714416504, "learning_rate": 9.453665035768087e-06, "loss": 1.1325, "step": 3493 }, { "epoch": 0.9638620689655173, "grad_norm": 3.6929633617401123, "learning_rate": 9.453250297216243e-06, "loss": 1.2809, "step": 3494 }, { "epoch": 0.9641379310344828, "grad_norm": 3.3113362789154053, "learning_rate": 9.452835410408687e-06, "loss": 1.1698, "step": 3495 }, { "epoch": 0.9644137931034483, "grad_norm": 3.830772876739502, "learning_rate": 9.452420375359226e-06, "loss": 1.2246, "step": 3496 }, { "epoch": 0.9646896551724138, "grad_norm": 4.121431350708008, "learning_rate": 9.45200519208168e-06, "loss": 1.3999, "step": 3497 }, { "epoch": 0.9649655172413794, "grad_norm": 3.371718406677246, "learning_rate": 9.451589860589873e-06, "loss": 1.2024, "step": 3498 }, { "epoch": 0.9652413793103448, "grad_norm": 3.84602689743042, "learning_rate": 9.451174380897627e-06, "loss": 1.4141, "step": 3499 }, { "epoch": 0.9655172413793104, "grad_norm": 3.682112455368042, "learning_rate": 9.450758753018778e-06, "loss": 1.2986, "step": 3500 }, { "epoch": 0.9655172413793104, "eval_loss": 1.257588505744934, "eval_runtime": 11.6728, "eval_samples_per_second": 34.268, "eval_steps_per_second": 4.283, "step": 3500 }, { "epoch": 0.9657931034482758, "grad_norm": 3.2580525875091553, "learning_rate": 9.450342976967163e-06, "loss": 1.2213, "step": 3501 }, { "epoch": 0.9660689655172414, "grad_norm": 3.477095127105713, "learning_rate": 9.449927052756622e-06, "loss": 1.3804, "step": 3502 }, { "epoch": 0.9663448275862069, "grad_norm": 3.756767511367798, "learning_rate": 9.449510980401003e-06, "loss": 1.3658, "step": 3503 }, { "epoch": 0.9666206896551724, "grad_norm": 3.520699977874756, "learning_rate": 9.449094759914154e-06, "loss": 1.2346, "step": 3504 }, { "epoch": 0.9668965517241379, "grad_norm": 3.5011041164398193, "learning_rate": 9.448678391309936e-06, "loss": 1.3404, "step": 3505 }, { "epoch": 0.9671724137931035, "grad_norm": 4.115669250488281, "learning_rate": 9.44826187460221e-06, "loss": 1.2738, "step": 3506 }, { "epoch": 0.9674482758620689, "grad_norm": 3.5955405235290527, "learning_rate": 9.447845209804843e-06, "loss": 1.3332, "step": 3507 }, { "epoch": 0.9677241379310345, "grad_norm": 3.2902419567108154, "learning_rate": 9.447428396931705e-06, "loss": 1.1657, "step": 3508 }, { "epoch": 0.968, "grad_norm": 3.4900941848754883, "learning_rate": 9.447011435996669e-06, "loss": 1.204, "step": 3509 }, { "epoch": 0.9682758620689655, "grad_norm": 3.5338659286499023, "learning_rate": 9.446594327013623e-06, "loss": 1.2371, "step": 3510 }, { "epoch": 0.968551724137931, "grad_norm": 3.469149351119995, "learning_rate": 9.446177069996448e-06, "loss": 1.2594, "step": 3511 }, { "epoch": 0.9688275862068966, "grad_norm": 3.3106517791748047, "learning_rate": 9.445759664959039e-06, "loss": 1.1835, "step": 3512 }, { "epoch": 0.969103448275862, "grad_norm": 3.739004135131836, "learning_rate": 9.44534211191529e-06, "loss": 1.2389, "step": 3513 }, { "epoch": 0.9693793103448276, "grad_norm": 3.5484859943389893, "learning_rate": 9.444924410879102e-06, "loss": 1.0811, "step": 3514 }, { "epoch": 0.9696551724137931, "grad_norm": 3.6754393577575684, "learning_rate": 9.44450656186438e-06, "loss": 1.2909, "step": 3515 }, { "epoch": 0.9699310344827586, "grad_norm": 3.6114296913146973, "learning_rate": 9.444088564885036e-06, "loss": 1.2296, "step": 3516 }, { "epoch": 0.9702068965517241, "grad_norm": 3.9799718856811523, "learning_rate": 9.443670419954987e-06, "loss": 1.3642, "step": 3517 }, { "epoch": 0.9704827586206897, "grad_norm": 3.6598222255706787, "learning_rate": 9.443252127088153e-06, "loss": 1.2105, "step": 3518 }, { "epoch": 0.9707586206896551, "grad_norm": 3.602984666824341, "learning_rate": 9.442833686298458e-06, "loss": 1.2844, "step": 3519 }, { "epoch": 0.9710344827586207, "grad_norm": 3.594802141189575, "learning_rate": 9.442415097599834e-06, "loss": 1.2517, "step": 3520 }, { "epoch": 0.9713103448275862, "grad_norm": 3.88102388381958, "learning_rate": 9.441996361006216e-06, "loss": 1.2785, "step": 3521 }, { "epoch": 0.9715862068965517, "grad_norm": 3.10420298576355, "learning_rate": 9.441577476531544e-06, "loss": 1.2097, "step": 3522 }, { "epoch": 0.9718620689655172, "grad_norm": 3.8217241764068604, "learning_rate": 9.441158444189765e-06, "loss": 1.2064, "step": 3523 }, { "epoch": 0.9721379310344828, "grad_norm": 3.415743827819824, "learning_rate": 9.440739263994827e-06, "loss": 1.3671, "step": 3524 }, { "epoch": 0.9724137931034482, "grad_norm": 4.003964900970459, "learning_rate": 9.440319935960687e-06, "loss": 1.3796, "step": 3525 }, { "epoch": 0.9726896551724138, "grad_norm": 3.554682970046997, "learning_rate": 9.439900460101305e-06, "loss": 1.3017, "step": 3526 }, { "epoch": 0.9729655172413794, "grad_norm": 3.584564208984375, "learning_rate": 9.439480836430646e-06, "loss": 1.2927, "step": 3527 }, { "epoch": 0.9732413793103448, "grad_norm": 3.7463388442993164, "learning_rate": 9.439061064962678e-06, "loss": 1.2164, "step": 3528 }, { "epoch": 0.9735172413793104, "grad_norm": 3.413714647293091, "learning_rate": 9.438641145711377e-06, "loss": 1.1329, "step": 3529 }, { "epoch": 0.9737931034482759, "grad_norm": 3.4354212284088135, "learning_rate": 9.438221078690724e-06, "loss": 1.1296, "step": 3530 }, { "epoch": 0.9740689655172414, "grad_norm": 3.2281322479248047, "learning_rate": 9.437800863914701e-06, "loss": 1.1347, "step": 3531 }, { "epoch": 0.9743448275862069, "grad_norm": 3.5444490909576416, "learning_rate": 9.4373805013973e-06, "loss": 1.3345, "step": 3532 }, { "epoch": 0.9746206896551725, "grad_norm": 3.433730125427246, "learning_rate": 9.436959991152514e-06, "loss": 1.0852, "step": 3533 }, { "epoch": 0.9748965517241379, "grad_norm": 3.4083709716796875, "learning_rate": 9.436539333194345e-06, "loss": 1.1482, "step": 3534 }, { "epoch": 0.9751724137931035, "grad_norm": 3.2559754848480225, "learning_rate": 9.436118527536795e-06, "loss": 1.1374, "step": 3535 }, { "epoch": 0.975448275862069, "grad_norm": 3.8881192207336426, "learning_rate": 9.435697574193874e-06, "loss": 1.3409, "step": 3536 }, { "epoch": 0.9757241379310345, "grad_norm": 3.354548931121826, "learning_rate": 9.435276473179596e-06, "loss": 1.0727, "step": 3537 }, { "epoch": 0.976, "grad_norm": 3.5475215911865234, "learning_rate": 9.434855224507979e-06, "loss": 1.1174, "step": 3538 }, { "epoch": 0.9762758620689655, "grad_norm": 3.5009281635284424, "learning_rate": 9.43443382819305e-06, "loss": 1.2098, "step": 3539 }, { "epoch": 0.976551724137931, "grad_norm": 3.567854642868042, "learning_rate": 9.434012284248835e-06, "loss": 1.2041, "step": 3540 }, { "epoch": 0.9768275862068966, "grad_norm": 3.403254508972168, "learning_rate": 9.43359059268937e-06, "loss": 1.0794, "step": 3541 }, { "epoch": 0.977103448275862, "grad_norm": 3.5326101779937744, "learning_rate": 9.433168753528693e-06, "loss": 1.2599, "step": 3542 }, { "epoch": 0.9773793103448276, "grad_norm": 3.6482722759246826, "learning_rate": 9.432746766780846e-06, "loss": 1.3446, "step": 3543 }, { "epoch": 0.9776551724137931, "grad_norm": 3.4494452476501465, "learning_rate": 9.43232463245988e-06, "loss": 1.2518, "step": 3544 }, { "epoch": 0.9779310344827586, "grad_norm": 4.778239727020264, "learning_rate": 9.431902350579846e-06, "loss": 1.2245, "step": 3545 }, { "epoch": 0.9782068965517241, "grad_norm": 3.819606065750122, "learning_rate": 9.431479921154807e-06, "loss": 1.214, "step": 3546 }, { "epoch": 0.9784827586206897, "grad_norm": 3.4298958778381348, "learning_rate": 9.431057344198822e-06, "loss": 1.1081, "step": 3547 }, { "epoch": 0.9787586206896551, "grad_norm": 3.689629077911377, "learning_rate": 9.430634619725961e-06, "loss": 1.3405, "step": 3548 }, { "epoch": 0.9790344827586207, "grad_norm": 3.774024724960327, "learning_rate": 9.430211747750296e-06, "loss": 1.3559, "step": 3549 }, { "epoch": 0.9793103448275862, "grad_norm": 3.567307710647583, "learning_rate": 9.429788728285907e-06, "loss": 1.3406, "step": 3550 }, { "epoch": 0.9795862068965517, "grad_norm": 3.3613362312316895, "learning_rate": 9.429365561346875e-06, "loss": 1.0839, "step": 3551 }, { "epoch": 0.9798620689655172, "grad_norm": 3.5956051349639893, "learning_rate": 9.428942246947288e-06, "loss": 1.2049, "step": 3552 }, { "epoch": 0.9801379310344828, "grad_norm": 3.3543379306793213, "learning_rate": 9.428518785101241e-06, "loss": 1.0533, "step": 3553 }, { "epoch": 0.9804137931034482, "grad_norm": 3.51643967628479, "learning_rate": 9.42809517582283e-06, "loss": 1.1364, "step": 3554 }, { "epoch": 0.9806896551724138, "grad_norm": 4.2104716300964355, "learning_rate": 9.427671419126158e-06, "loss": 1.333, "step": 3555 }, { "epoch": 0.9809655172413793, "grad_norm": 3.5080063343048096, "learning_rate": 9.427247515025331e-06, "loss": 1.147, "step": 3556 }, { "epoch": 0.9812413793103448, "grad_norm": 3.517406463623047, "learning_rate": 9.426823463534464e-06, "loss": 1.1209, "step": 3557 }, { "epoch": 0.9815172413793103, "grad_norm": 3.7435381412506104, "learning_rate": 9.426399264667675e-06, "loss": 1.1345, "step": 3558 }, { "epoch": 0.9817931034482759, "grad_norm": 4.323508262634277, "learning_rate": 9.425974918439083e-06, "loss": 1.3634, "step": 3559 }, { "epoch": 0.9820689655172414, "grad_norm": 4.060039043426514, "learning_rate": 9.425550424862815e-06, "loss": 1.2569, "step": 3560 }, { "epoch": 0.9823448275862069, "grad_norm": 3.634805917739868, "learning_rate": 9.425125783953007e-06, "loss": 1.2702, "step": 3561 }, { "epoch": 0.9826206896551725, "grad_norm": 3.5102081298828125, "learning_rate": 9.424700995723792e-06, "loss": 1.1811, "step": 3562 }, { "epoch": 0.9828965517241379, "grad_norm": 3.630795478820801, "learning_rate": 9.424276060189314e-06, "loss": 1.2393, "step": 3563 }, { "epoch": 0.9831724137931035, "grad_norm": 3.624297857284546, "learning_rate": 9.42385097736372e-06, "loss": 1.229, "step": 3564 }, { "epoch": 0.983448275862069, "grad_norm": 3.45267391204834, "learning_rate": 9.423425747261159e-06, "loss": 1.219, "step": 3565 }, { "epoch": 0.9837241379310345, "grad_norm": 3.1833200454711914, "learning_rate": 9.423000369895791e-06, "loss": 1.1678, "step": 3566 }, { "epoch": 0.984, "grad_norm": 3.2623322010040283, "learning_rate": 9.422574845281775e-06, "loss": 1.2543, "step": 3567 }, { "epoch": 0.9842758620689656, "grad_norm": 3.5408153533935547, "learning_rate": 9.42214917343328e-06, "loss": 1.2311, "step": 3568 }, { "epoch": 0.984551724137931, "grad_norm": 3.5114707946777344, "learning_rate": 9.421723354364476e-06, "loss": 1.316, "step": 3569 }, { "epoch": 0.9848275862068966, "grad_norm": 3.5722532272338867, "learning_rate": 9.421297388089538e-06, "loss": 1.2963, "step": 3570 }, { "epoch": 0.985103448275862, "grad_norm": 3.4424164295196533, "learning_rate": 9.420871274622648e-06, "loss": 1.2133, "step": 3571 }, { "epoch": 0.9853793103448276, "grad_norm": 3.7821898460388184, "learning_rate": 9.42044501397799e-06, "loss": 1.2564, "step": 3572 }, { "epoch": 0.9856551724137931, "grad_norm": 3.688173770904541, "learning_rate": 9.42001860616976e-06, "loss": 1.1691, "step": 3573 }, { "epoch": 0.9859310344827587, "grad_norm": 3.370786428451538, "learning_rate": 9.41959205121215e-06, "loss": 1.1609, "step": 3574 }, { "epoch": 0.9862068965517241, "grad_norm": 3.5798721313476562, "learning_rate": 9.41916534911936e-06, "loss": 1.1973, "step": 3575 }, { "epoch": 0.9864827586206897, "grad_norm": 3.0792922973632812, "learning_rate": 9.418738499905597e-06, "loss": 1.2053, "step": 3576 }, { "epoch": 0.9867586206896551, "grad_norm": 3.4083609580993652, "learning_rate": 9.418311503585071e-06, "loss": 1.2597, "step": 3577 }, { "epoch": 0.9870344827586207, "grad_norm": 3.441406488418579, "learning_rate": 9.417884360171999e-06, "loss": 1.2454, "step": 3578 }, { "epoch": 0.9873103448275862, "grad_norm": 3.2958903312683105, "learning_rate": 9.4174570696806e-06, "loss": 1.175, "step": 3579 }, { "epoch": 0.9875862068965517, "grad_norm": 3.764596939086914, "learning_rate": 9.417029632125097e-06, "loss": 1.4044, "step": 3580 }, { "epoch": 0.9878620689655172, "grad_norm": 3.009023666381836, "learning_rate": 9.416602047519725e-06, "loss": 1.2201, "step": 3581 }, { "epoch": 0.9881379310344828, "grad_norm": 2.955813407897949, "learning_rate": 9.416174315878715e-06, "loss": 1.0795, "step": 3582 }, { "epoch": 0.9884137931034482, "grad_norm": 3.5847153663635254, "learning_rate": 9.41574643721631e-06, "loss": 1.1466, "step": 3583 }, { "epoch": 0.9886896551724138, "grad_norm": 3.0730767250061035, "learning_rate": 9.41531841154675e-06, "loss": 1.1173, "step": 3584 }, { "epoch": 0.9889655172413793, "grad_norm": 3.6248440742492676, "learning_rate": 9.414890238884289e-06, "loss": 1.421, "step": 3585 }, { "epoch": 0.9892413793103448, "grad_norm": 3.7604246139526367, "learning_rate": 9.414461919243178e-06, "loss": 1.279, "step": 3586 }, { "epoch": 0.9895172413793103, "grad_norm": 3.6368305683135986, "learning_rate": 9.41403345263768e-06, "loss": 1.3036, "step": 3587 }, { "epoch": 0.9897931034482759, "grad_norm": 3.2431087493896484, "learning_rate": 9.41360483908206e-06, "loss": 1.2107, "step": 3588 }, { "epoch": 0.9900689655172413, "grad_norm": 3.2817180156707764, "learning_rate": 9.413176078590582e-06, "loss": 1.0537, "step": 3589 }, { "epoch": 0.9903448275862069, "grad_norm": 3.374157667160034, "learning_rate": 9.412747171177526e-06, "loss": 1.2338, "step": 3590 }, { "epoch": 0.9906206896551724, "grad_norm": 3.560572385787964, "learning_rate": 9.412318116857164e-06, "loss": 1.2948, "step": 3591 }, { "epoch": 0.9908965517241379, "grad_norm": 3.784919023513794, "learning_rate": 9.411888915643788e-06, "loss": 1.3426, "step": 3592 }, { "epoch": 0.9911724137931035, "grad_norm": 3.394169807434082, "learning_rate": 9.411459567551681e-06, "loss": 1.2019, "step": 3593 }, { "epoch": 0.991448275862069, "grad_norm": 3.5333683490753174, "learning_rate": 9.411030072595137e-06, "loss": 1.1754, "step": 3594 }, { "epoch": 0.9917241379310345, "grad_norm": 3.6388003826141357, "learning_rate": 9.41060043078846e-06, "loss": 1.1063, "step": 3595 }, { "epoch": 0.992, "grad_norm": 3.4492435455322266, "learning_rate": 9.410170642145946e-06, "loss": 1.3148, "step": 3596 }, { "epoch": 0.9922758620689656, "grad_norm": 3.8166568279266357, "learning_rate": 9.409740706681909e-06, "loss": 1.2884, "step": 3597 }, { "epoch": 0.992551724137931, "grad_norm": 3.8989439010620117, "learning_rate": 9.40931062441066e-06, "loss": 1.2322, "step": 3598 }, { "epoch": 0.9928275862068966, "grad_norm": 3.6001105308532715, "learning_rate": 9.408880395346515e-06, "loss": 1.1413, "step": 3599 }, { "epoch": 0.993103448275862, "grad_norm": 3.3498637676239014, "learning_rate": 9.4084500195038e-06, "loss": 1.2399, "step": 3600 }, { "epoch": 0.9933793103448276, "grad_norm": 3.569905996322632, "learning_rate": 9.408019496896843e-06, "loss": 1.1846, "step": 3601 }, { "epoch": 0.9936551724137931, "grad_norm": 3.4120631217956543, "learning_rate": 9.407588827539975e-06, "loss": 1.1238, "step": 3602 }, { "epoch": 0.9939310344827587, "grad_norm": 4.1030964851379395, "learning_rate": 9.407158011447536e-06, "loss": 1.3378, "step": 3603 }, { "epoch": 0.9942068965517241, "grad_norm": 3.3100059032440186, "learning_rate": 9.406727048633865e-06, "loss": 1.0831, "step": 3604 }, { "epoch": 0.9944827586206897, "grad_norm": 3.6109800338745117, "learning_rate": 9.406295939113314e-06, "loss": 1.2982, "step": 3605 }, { "epoch": 0.9947586206896551, "grad_norm": 3.555248260498047, "learning_rate": 9.405864682900231e-06, "loss": 1.2223, "step": 3606 }, { "epoch": 0.9950344827586207, "grad_norm": 3.9348831176757812, "learning_rate": 9.405433280008975e-06, "loss": 1.3455, "step": 3607 }, { "epoch": 0.9953103448275862, "grad_norm": 3.412658214569092, "learning_rate": 9.405001730453909e-06, "loss": 1.2264, "step": 3608 }, { "epoch": 0.9955862068965518, "grad_norm": 3.542269706726074, "learning_rate": 9.404570034249399e-06, "loss": 1.17, "step": 3609 }, { "epoch": 0.9958620689655172, "grad_norm": 3.7368040084838867, "learning_rate": 9.404138191409816e-06, "loss": 1.3228, "step": 3610 }, { "epoch": 0.9961379310344828, "grad_norm": 3.716280698776245, "learning_rate": 9.403706201949539e-06, "loss": 1.3733, "step": 3611 }, { "epoch": 0.9964137931034482, "grad_norm": 3.193559408187866, "learning_rate": 9.403274065882947e-06, "loss": 1.1422, "step": 3612 }, { "epoch": 0.9966896551724138, "grad_norm": 3.1479830741882324, "learning_rate": 9.402841783224429e-06, "loss": 1.1144, "step": 3613 }, { "epoch": 0.9969655172413793, "grad_norm": 3.6234724521636963, "learning_rate": 9.402409353988373e-06, "loss": 1.3626, "step": 3614 }, { "epoch": 0.9972413793103448, "grad_norm": 3.2117679119110107, "learning_rate": 9.401976778189179e-06, "loss": 1.0879, "step": 3615 }, { "epoch": 0.9975172413793103, "grad_norm": 3.2853341102600098, "learning_rate": 9.401544055841245e-06, "loss": 1.2009, "step": 3616 }, { "epoch": 0.9977931034482759, "grad_norm": 3.44478702545166, "learning_rate": 9.40111118695898e-06, "loss": 1.1529, "step": 3617 }, { "epoch": 0.9980689655172413, "grad_norm": 3.599716901779175, "learning_rate": 9.400678171556794e-06, "loss": 1.3237, "step": 3618 }, { "epoch": 0.9983448275862069, "grad_norm": 3.6539337635040283, "learning_rate": 9.400245009649101e-06, "loss": 1.2289, "step": 3619 }, { "epoch": 0.9986206896551724, "grad_norm": 3.6096861362457275, "learning_rate": 9.399811701250323e-06, "loss": 1.2125, "step": 3620 }, { "epoch": 0.9988965517241379, "grad_norm": 3.7063324451446533, "learning_rate": 9.399378246374884e-06, "loss": 1.4472, "step": 3621 }, { "epoch": 0.9991724137931034, "grad_norm": 3.566204309463501, "learning_rate": 9.398944645037219e-06, "loss": 1.2891, "step": 3622 }, { "epoch": 0.999448275862069, "grad_norm": 3.5210351943969727, "learning_rate": 9.398510897251756e-06, "loss": 1.307, "step": 3623 }, { "epoch": 0.9997241379310344, "grad_norm": 3.2832767963409424, "learning_rate": 9.398077003032942e-06, "loss": 1.1397, "step": 3624 }, { "epoch": 1.0, "grad_norm": 3.498565435409546, "learning_rate": 9.397642962395217e-06, "loss": 1.2267, "step": 3625 }, { "epoch": 1.0002758620689656, "grad_norm": 3.349182605743408, "learning_rate": 9.397208775353035e-06, "loss": 0.957, "step": 3626 }, { "epoch": 1.0005517241379311, "grad_norm": 3.4610066413879395, "learning_rate": 9.396774441920848e-06, "loss": 0.8625, "step": 3627 }, { "epoch": 1.0008275862068965, "grad_norm": 3.1865272521972656, "learning_rate": 9.396339962113117e-06, "loss": 0.8733, "step": 3628 }, { "epoch": 1.001103448275862, "grad_norm": 3.492502450942993, "learning_rate": 9.395905335944305e-06, "loss": 0.9252, "step": 3629 }, { "epoch": 1.0013793103448276, "grad_norm": 3.351041555404663, "learning_rate": 9.395470563428884e-06, "loss": 0.9181, "step": 3630 }, { "epoch": 1.0016551724137932, "grad_norm": 3.1633124351501465, "learning_rate": 9.395035644581324e-06, "loss": 0.813, "step": 3631 }, { "epoch": 1.0019310344827586, "grad_norm": 3.352509021759033, "learning_rate": 9.39460057941611e-06, "loss": 0.8119, "step": 3632 }, { "epoch": 1.0022068965517241, "grad_norm": 3.371508836746216, "learning_rate": 9.394165367947719e-06, "loss": 0.8952, "step": 3633 }, { "epoch": 1.0024827586206897, "grad_norm": 4.327502250671387, "learning_rate": 9.393730010190645e-06, "loss": 0.9763, "step": 3634 }, { "epoch": 1.0027586206896553, "grad_norm": 3.7803025245666504, "learning_rate": 9.39329450615938e-06, "loss": 0.6839, "step": 3635 }, { "epoch": 1.0030344827586206, "grad_norm": 3.603543519973755, "learning_rate": 9.392858855868425e-06, "loss": 0.7336, "step": 3636 }, { "epoch": 1.0033103448275862, "grad_norm": 4.039660453796387, "learning_rate": 9.39242305933228e-06, "loss": 0.7738, "step": 3637 }, { "epoch": 1.0035862068965518, "grad_norm": 4.237802982330322, "learning_rate": 9.391987116565456e-06, "loss": 0.7309, "step": 3638 }, { "epoch": 1.0038620689655173, "grad_norm": 3.924041986465454, "learning_rate": 9.391551027582463e-06, "loss": 0.832, "step": 3639 }, { "epoch": 1.0041379310344827, "grad_norm": 3.36446213722229, "learning_rate": 9.391114792397823e-06, "loss": 0.7292, "step": 3640 }, { "epoch": 1.0044137931034482, "grad_norm": 4.13887357711792, "learning_rate": 9.390678411026058e-06, "loss": 0.7188, "step": 3641 }, { "epoch": 1.0046896551724138, "grad_norm": 3.95076060295105, "learning_rate": 9.390241883481692e-06, "loss": 0.9192, "step": 3642 }, { "epoch": 1.0049655172413794, "grad_norm": 3.6235272884368896, "learning_rate": 9.389805209779263e-06, "loss": 0.7457, "step": 3643 }, { "epoch": 1.0052413793103447, "grad_norm": 3.6885716915130615, "learning_rate": 9.389368389933304e-06, "loss": 0.8082, "step": 3644 }, { "epoch": 1.0055172413793103, "grad_norm": 3.4986307621002197, "learning_rate": 9.388931423958361e-06, "loss": 0.7472, "step": 3645 }, { "epoch": 1.0057931034482759, "grad_norm": 3.8342955112457275, "learning_rate": 9.38849431186898e-06, "loss": 0.7944, "step": 3646 }, { "epoch": 1.0060689655172415, "grad_norm": 4.029666423797607, "learning_rate": 9.388057053679713e-06, "loss": 0.8618, "step": 3647 }, { "epoch": 1.0063448275862068, "grad_norm": 4.273388385772705, "learning_rate": 9.387619649405116e-06, "loss": 0.8336, "step": 3648 }, { "epoch": 1.0066206896551724, "grad_norm": 3.68745756149292, "learning_rate": 9.387182099059752e-06, "loss": 0.8172, "step": 3649 }, { "epoch": 1.006896551724138, "grad_norm": 4.076519012451172, "learning_rate": 9.38674440265819e-06, "loss": 0.902, "step": 3650 }, { "epoch": 1.0071724137931035, "grad_norm": 4.1129536628723145, "learning_rate": 9.386306560214998e-06, "loss": 0.7751, "step": 3651 }, { "epoch": 1.0074482758620689, "grad_norm": 3.8320841789245605, "learning_rate": 9.385868571744751e-06, "loss": 0.8882, "step": 3652 }, { "epoch": 1.0077241379310344, "grad_norm": 3.6309633255004883, "learning_rate": 9.385430437262035e-06, "loss": 0.6935, "step": 3653 }, { "epoch": 1.008, "grad_norm": 3.6539766788482666, "learning_rate": 9.384992156781437e-06, "loss": 0.6514, "step": 3654 }, { "epoch": 1.0082758620689656, "grad_norm": 3.6492252349853516, "learning_rate": 9.384553730317541e-06, "loss": 0.8165, "step": 3655 }, { "epoch": 1.008551724137931, "grad_norm": 3.966176986694336, "learning_rate": 9.38411515788495e-06, "loss": 0.7981, "step": 3656 }, { "epoch": 1.0088275862068965, "grad_norm": 3.9938132762908936, "learning_rate": 9.38367643949826e-06, "loss": 0.7973, "step": 3657 }, { "epoch": 1.009103448275862, "grad_norm": 4.503997802734375, "learning_rate": 9.38323757517208e-06, "loss": 0.9939, "step": 3658 }, { "epoch": 1.0093793103448276, "grad_norm": 3.981100082397461, "learning_rate": 9.382798564921018e-06, "loss": 0.8664, "step": 3659 }, { "epoch": 1.0096551724137932, "grad_norm": 3.3569324016571045, "learning_rate": 9.38235940875969e-06, "loss": 0.7543, "step": 3660 }, { "epoch": 1.0099310344827586, "grad_norm": 3.7362446784973145, "learning_rate": 9.38192010670272e-06, "loss": 0.8451, "step": 3661 }, { "epoch": 1.0102068965517241, "grad_norm": 3.6999363899230957, "learning_rate": 9.381480658764725e-06, "loss": 0.8248, "step": 3662 }, { "epoch": 1.0104827586206897, "grad_norm": 3.475116491317749, "learning_rate": 9.381041064960342e-06, "loss": 0.7124, "step": 3663 }, { "epoch": 1.0107586206896553, "grad_norm": 3.3667750358581543, "learning_rate": 9.380601325304203e-06, "loss": 0.6991, "step": 3664 }, { "epoch": 1.0110344827586206, "grad_norm": 4.372078895568848, "learning_rate": 9.380161439810948e-06, "loss": 0.9218, "step": 3665 }, { "epoch": 1.0113103448275862, "grad_norm": 4.178094863891602, "learning_rate": 9.379721408495222e-06, "loss": 0.8258, "step": 3666 }, { "epoch": 1.0115862068965518, "grad_norm": 3.8437492847442627, "learning_rate": 9.379281231371672e-06, "loss": 0.8247, "step": 3667 }, { "epoch": 1.0118620689655173, "grad_norm": 4.005189895629883, "learning_rate": 9.378840908454957e-06, "loss": 0.7938, "step": 3668 }, { "epoch": 1.0121379310344827, "grad_norm": 3.805187940597534, "learning_rate": 9.378400439759731e-06, "loss": 0.7996, "step": 3669 }, { "epoch": 1.0124137931034483, "grad_norm": 3.806004047393799, "learning_rate": 9.37795982530066e-06, "loss": 0.7721, "step": 3670 }, { "epoch": 1.0126896551724138, "grad_norm": 3.4969675540924072, "learning_rate": 9.377519065092413e-06, "loss": 0.6869, "step": 3671 }, { "epoch": 1.0129655172413794, "grad_norm": 3.9590208530426025, "learning_rate": 9.377078159149664e-06, "loss": 0.7875, "step": 3672 }, { "epoch": 1.0132413793103447, "grad_norm": 3.916682243347168, "learning_rate": 9.37663710748709e-06, "loss": 0.8089, "step": 3673 }, { "epoch": 1.0135172413793103, "grad_norm": 3.8652639389038086, "learning_rate": 9.376195910119375e-06, "loss": 0.8979, "step": 3674 }, { "epoch": 1.013793103448276, "grad_norm": 4.320820331573486, "learning_rate": 9.375754567061206e-06, "loss": 0.8711, "step": 3675 }, { "epoch": 1.0140689655172415, "grad_norm": 3.8688411712646484, "learning_rate": 9.37531307832728e-06, "loss": 0.8586, "step": 3676 }, { "epoch": 1.0143448275862068, "grad_norm": 3.767266035079956, "learning_rate": 9.37487144393229e-06, "loss": 0.8417, "step": 3677 }, { "epoch": 1.0146206896551724, "grad_norm": 3.986837863922119, "learning_rate": 9.37442966389094e-06, "loss": 0.8847, "step": 3678 }, { "epoch": 1.014896551724138, "grad_norm": 4.206093788146973, "learning_rate": 9.37398773821794e-06, "loss": 0.7673, "step": 3679 }, { "epoch": 1.0151724137931035, "grad_norm": 3.9358487129211426, "learning_rate": 9.373545666927999e-06, "loss": 0.9201, "step": 3680 }, { "epoch": 1.0154482758620689, "grad_norm": 3.786081314086914, "learning_rate": 9.373103450035837e-06, "loss": 0.8524, "step": 3681 }, { "epoch": 1.0157241379310344, "grad_norm": 4.10860538482666, "learning_rate": 9.372661087556177e-06, "loss": 0.9192, "step": 3682 }, { "epoch": 1.016, "grad_norm": 3.994166135787964, "learning_rate": 9.37221857950374e-06, "loss": 0.7057, "step": 3683 }, { "epoch": 1.0162758620689656, "grad_norm": 3.511441230773926, "learning_rate": 9.371775925893265e-06, "loss": 0.7251, "step": 3684 }, { "epoch": 1.016551724137931, "grad_norm": 3.81571102142334, "learning_rate": 9.371333126739483e-06, "loss": 0.8844, "step": 3685 }, { "epoch": 1.0168275862068965, "grad_norm": 4.579653739929199, "learning_rate": 9.37089018205714e-06, "loss": 0.8353, "step": 3686 }, { "epoch": 1.017103448275862, "grad_norm": 3.879481315612793, "learning_rate": 9.370447091860978e-06, "loss": 0.823, "step": 3687 }, { "epoch": 1.0173793103448276, "grad_norm": 3.6071345806121826, "learning_rate": 9.370003856165753e-06, "loss": 0.7518, "step": 3688 }, { "epoch": 1.017655172413793, "grad_norm": 3.9530060291290283, "learning_rate": 9.369560474986217e-06, "loss": 0.7195, "step": 3689 }, { "epoch": 1.0179310344827586, "grad_norm": 3.6994872093200684, "learning_rate": 9.36911694833713e-06, "loss": 0.7937, "step": 3690 }, { "epoch": 1.0182068965517241, "grad_norm": 4.039832592010498, "learning_rate": 9.368673276233265e-06, "loss": 0.7401, "step": 3691 }, { "epoch": 1.0184827586206897, "grad_norm": 4.312869548797607, "learning_rate": 9.368229458689383e-06, "loss": 0.7517, "step": 3692 }, { "epoch": 1.0187586206896553, "grad_norm": 4.435859680175781, "learning_rate": 9.367785495720267e-06, "loss": 0.874, "step": 3693 }, { "epoch": 1.0190344827586206, "grad_norm": 3.6509644985198975, "learning_rate": 9.367341387340692e-06, "loss": 0.9246, "step": 3694 }, { "epoch": 1.0193103448275862, "grad_norm": 3.589010000228882, "learning_rate": 9.366897133565448e-06, "loss": 0.8548, "step": 3695 }, { "epoch": 1.0195862068965518, "grad_norm": 3.5735995769500732, "learning_rate": 9.36645273440932e-06, "loss": 0.7193, "step": 3696 }, { "epoch": 1.0198620689655173, "grad_norm": 3.494185209274292, "learning_rate": 9.366008189887104e-06, "loss": 0.6461, "step": 3697 }, { "epoch": 1.0201379310344827, "grad_norm": 3.712035655975342, "learning_rate": 9.3655635000136e-06, "loss": 0.8712, "step": 3698 }, { "epoch": 1.0204137931034483, "grad_norm": 3.5109174251556396, "learning_rate": 9.365118664803616e-06, "loss": 0.658, "step": 3699 }, { "epoch": 1.0206896551724138, "grad_norm": 3.754218339920044, "learning_rate": 9.364673684271956e-06, "loss": 0.7756, "step": 3700 }, { "epoch": 1.0209655172413794, "grad_norm": 3.8628830909729004, "learning_rate": 9.364228558433437e-06, "loss": 0.7895, "step": 3701 }, { "epoch": 1.0212413793103448, "grad_norm": 4.238793849945068, "learning_rate": 9.363783287302876e-06, "loss": 0.827, "step": 3702 }, { "epoch": 1.0215172413793103, "grad_norm": 4.122849464416504, "learning_rate": 9.363337870895099e-06, "loss": 0.9006, "step": 3703 }, { "epoch": 1.021793103448276, "grad_norm": 3.669294595718384, "learning_rate": 9.362892309224934e-06, "loss": 0.7561, "step": 3704 }, { "epoch": 1.0220689655172415, "grad_norm": 4.263172149658203, "learning_rate": 9.362446602307214e-06, "loss": 0.8232, "step": 3705 }, { "epoch": 1.0223448275862068, "grad_norm": 4.10144567489624, "learning_rate": 9.362000750156775e-06, "loss": 0.7666, "step": 3706 }, { "epoch": 1.0226206896551724, "grad_norm": 4.547667026519775, "learning_rate": 9.361554752788464e-06, "loss": 0.8312, "step": 3707 }, { "epoch": 1.022896551724138, "grad_norm": 4.1330718994140625, "learning_rate": 9.361108610217127e-06, "loss": 0.8045, "step": 3708 }, { "epoch": 1.0231724137931035, "grad_norm": 3.791469097137451, "learning_rate": 9.36066232245762e-06, "loss": 0.8645, "step": 3709 }, { "epoch": 1.0234482758620689, "grad_norm": 4.059629917144775, "learning_rate": 9.360215889524794e-06, "loss": 0.8731, "step": 3710 }, { "epoch": 1.0237241379310345, "grad_norm": 3.390213966369629, "learning_rate": 9.359769311433516e-06, "loss": 0.7445, "step": 3711 }, { "epoch": 1.024, "grad_norm": 3.4078431129455566, "learning_rate": 9.359322588198653e-06, "loss": 0.6751, "step": 3712 }, { "epoch": 1.0242758620689656, "grad_norm": 3.7598061561584473, "learning_rate": 9.358875719835076e-06, "loss": 0.8765, "step": 3713 }, { "epoch": 1.024551724137931, "grad_norm": 4.369820594787598, "learning_rate": 9.358428706357665e-06, "loss": 0.7657, "step": 3714 }, { "epoch": 1.0248275862068965, "grad_norm": 3.6662309169769287, "learning_rate": 9.357981547781297e-06, "loss": 0.6939, "step": 3715 }, { "epoch": 1.025103448275862, "grad_norm": 3.6433825492858887, "learning_rate": 9.357534244120861e-06, "loss": 0.7169, "step": 3716 }, { "epoch": 1.0253793103448277, "grad_norm": 3.853898286819458, "learning_rate": 9.357086795391248e-06, "loss": 0.739, "step": 3717 }, { "epoch": 1.025655172413793, "grad_norm": 3.980196237564087, "learning_rate": 9.356639201607356e-06, "loss": 0.855, "step": 3718 }, { "epoch": 1.0259310344827586, "grad_norm": 4.476511001586914, "learning_rate": 9.356191462784085e-06, "loss": 0.8456, "step": 3719 }, { "epoch": 1.0262068965517241, "grad_norm": 3.4558892250061035, "learning_rate": 9.35574357893634e-06, "loss": 0.7715, "step": 3720 }, { "epoch": 1.0264827586206897, "grad_norm": 4.346835136413574, "learning_rate": 9.355295550079033e-06, "loss": 0.8725, "step": 3721 }, { "epoch": 1.026758620689655, "grad_norm": 3.6029107570648193, "learning_rate": 9.354847376227079e-06, "loss": 0.7298, "step": 3722 }, { "epoch": 1.0270344827586206, "grad_norm": 3.861743450164795, "learning_rate": 9.354399057395398e-06, "loss": 0.8086, "step": 3723 }, { "epoch": 1.0273103448275862, "grad_norm": 3.896411418914795, "learning_rate": 9.353950593598916e-06, "loss": 0.7298, "step": 3724 }, { "epoch": 1.0275862068965518, "grad_norm": 3.8837039470672607, "learning_rate": 9.353501984852562e-06, "loss": 0.8123, "step": 3725 }, { "epoch": 1.0278620689655174, "grad_norm": 3.8388328552246094, "learning_rate": 9.353053231171275e-06, "loss": 0.8208, "step": 3726 }, { "epoch": 1.0281379310344827, "grad_norm": 4.0136799812316895, "learning_rate": 9.352604332569987e-06, "loss": 0.7657, "step": 3727 }, { "epoch": 1.0284137931034483, "grad_norm": 3.7708959579467773, "learning_rate": 9.35215528906365e-06, "loss": 0.6533, "step": 3728 }, { "epoch": 1.0286896551724138, "grad_norm": 4.467050075531006, "learning_rate": 9.351706100667208e-06, "loss": 0.8414, "step": 3729 }, { "epoch": 1.0289655172413794, "grad_norm": 3.93597412109375, "learning_rate": 9.351256767395619e-06, "loss": 0.7994, "step": 3730 }, { "epoch": 1.0292413793103448, "grad_norm": 3.80808424949646, "learning_rate": 9.35080728926384e-06, "loss": 0.7715, "step": 3731 }, { "epoch": 1.0295172413793103, "grad_norm": 3.705090284347534, "learning_rate": 9.350357666286835e-06, "loss": 0.8593, "step": 3732 }, { "epoch": 1.029793103448276, "grad_norm": 3.6352336406707764, "learning_rate": 9.349907898479573e-06, "loss": 0.7027, "step": 3733 }, { "epoch": 1.0300689655172415, "grad_norm": 3.5925347805023193, "learning_rate": 9.34945798585703e-06, "loss": 0.692, "step": 3734 }, { "epoch": 1.0303448275862068, "grad_norm": 4.068915843963623, "learning_rate": 9.349007928434178e-06, "loss": 0.842, "step": 3735 }, { "epoch": 1.0306206896551724, "grad_norm": 3.5372695922851562, "learning_rate": 9.348557726226006e-06, "loss": 0.8107, "step": 3736 }, { "epoch": 1.030896551724138, "grad_norm": 3.822619915008545, "learning_rate": 9.3481073792475e-06, "loss": 0.885, "step": 3737 }, { "epoch": 1.0311724137931035, "grad_norm": 4.118279457092285, "learning_rate": 9.347656887513651e-06, "loss": 0.8057, "step": 3738 }, { "epoch": 1.0314482758620689, "grad_norm": 3.7590034008026123, "learning_rate": 9.34720625103946e-06, "loss": 0.7694, "step": 3739 }, { "epoch": 1.0317241379310345, "grad_norm": 3.6571359634399414, "learning_rate": 9.346755469839927e-06, "loss": 0.6837, "step": 3740 }, { "epoch": 1.032, "grad_norm": 4.090829372406006, "learning_rate": 9.346304543930059e-06, "loss": 0.7954, "step": 3741 }, { "epoch": 1.0322758620689656, "grad_norm": 4.103240489959717, "learning_rate": 9.34585347332487e-06, "loss": 0.9241, "step": 3742 }, { "epoch": 1.032551724137931, "grad_norm": 4.190467834472656, "learning_rate": 9.345402258039376e-06, "loss": 0.9232, "step": 3743 }, { "epoch": 1.0328275862068965, "grad_norm": 3.9080355167388916, "learning_rate": 9.344950898088596e-06, "loss": 0.76, "step": 3744 }, { "epoch": 1.033103448275862, "grad_norm": 3.8073408603668213, "learning_rate": 9.34449939348756e-06, "loss": 0.7401, "step": 3745 }, { "epoch": 1.0333793103448277, "grad_norm": 3.6350224018096924, "learning_rate": 9.344047744251299e-06, "loss": 0.8233, "step": 3746 }, { "epoch": 1.033655172413793, "grad_norm": 4.044993877410889, "learning_rate": 9.343595950394848e-06, "loss": 0.7129, "step": 3747 }, { "epoch": 1.0339310344827586, "grad_norm": 4.110714435577393, "learning_rate": 9.343144011933247e-06, "loss": 0.8159, "step": 3748 }, { "epoch": 1.0342068965517242, "grad_norm": 3.6282436847686768, "learning_rate": 9.342691928881544e-06, "loss": 0.7838, "step": 3749 }, { "epoch": 1.0344827586206897, "grad_norm": 3.944167375564575, "learning_rate": 9.34223970125479e-06, "loss": 0.7697, "step": 3750 }, { "epoch": 1.034758620689655, "grad_norm": 4.3585710525512695, "learning_rate": 9.341787329068036e-06, "loss": 0.8773, "step": 3751 }, { "epoch": 1.0350344827586206, "grad_norm": 4.106535911560059, "learning_rate": 9.341334812336346e-06, "loss": 0.7144, "step": 3752 }, { "epoch": 1.0353103448275862, "grad_norm": 4.039569854736328, "learning_rate": 9.340882151074784e-06, "loss": 0.8816, "step": 3753 }, { "epoch": 1.0355862068965518, "grad_norm": 3.7829697132110596, "learning_rate": 9.340429345298421e-06, "loss": 0.7885, "step": 3754 }, { "epoch": 1.0358620689655171, "grad_norm": 4.134342670440674, "learning_rate": 9.339976395022327e-06, "loss": 0.7865, "step": 3755 }, { "epoch": 1.0361379310344827, "grad_norm": 4.590356349945068, "learning_rate": 9.339523300261588e-06, "loss": 1.0073, "step": 3756 }, { "epoch": 1.0364137931034483, "grad_norm": 3.623292922973633, "learning_rate": 9.339070061031285e-06, "loss": 0.7428, "step": 3757 }, { "epoch": 1.0366896551724138, "grad_norm": 3.6703553199768066, "learning_rate": 9.338616677346508e-06, "loss": 0.7382, "step": 3758 }, { "epoch": 1.0369655172413794, "grad_norm": 3.7826743125915527, "learning_rate": 9.338163149222349e-06, "loss": 0.8094, "step": 3759 }, { "epoch": 1.0372413793103448, "grad_norm": 4.087406158447266, "learning_rate": 9.337709476673907e-06, "loss": 0.87, "step": 3760 }, { "epoch": 1.0375172413793103, "grad_norm": 3.9231226444244385, "learning_rate": 9.337255659716285e-06, "loss": 0.7619, "step": 3761 }, { "epoch": 1.037793103448276, "grad_norm": 4.020940780639648, "learning_rate": 9.336801698364594e-06, "loss": 0.7903, "step": 3762 }, { "epoch": 1.0380689655172415, "grad_norm": 4.155969142913818, "learning_rate": 9.336347592633945e-06, "loss": 0.8436, "step": 3763 }, { "epoch": 1.0383448275862068, "grad_norm": 3.6396422386169434, "learning_rate": 9.335893342539457e-06, "loss": 0.6851, "step": 3764 }, { "epoch": 1.0386206896551724, "grad_norm": 3.503920793533325, "learning_rate": 9.33543894809625e-06, "loss": 0.6828, "step": 3765 }, { "epoch": 1.038896551724138, "grad_norm": 4.7896575927734375, "learning_rate": 9.334984409319457e-06, "loss": 0.8555, "step": 3766 }, { "epoch": 1.0391724137931035, "grad_norm": 4.238002777099609, "learning_rate": 9.334529726224207e-06, "loss": 0.7152, "step": 3767 }, { "epoch": 1.039448275862069, "grad_norm": 4.399844169616699, "learning_rate": 9.334074898825634e-06, "loss": 0.7309, "step": 3768 }, { "epoch": 1.0397241379310345, "grad_norm": 4.037847518920898, "learning_rate": 9.333619927138884e-06, "loss": 0.8449, "step": 3769 }, { "epoch": 1.04, "grad_norm": 4.399582862854004, "learning_rate": 9.333164811179104e-06, "loss": 0.9383, "step": 3770 }, { "epoch": 1.0402758620689656, "grad_norm": 3.8599696159362793, "learning_rate": 9.332709550961443e-06, "loss": 0.7839, "step": 3771 }, { "epoch": 1.040551724137931, "grad_norm": 3.4529709815979004, "learning_rate": 9.332254146501058e-06, "loss": 0.7045, "step": 3772 }, { "epoch": 1.0408275862068965, "grad_norm": 3.1541101932525635, "learning_rate": 9.331798597813112e-06, "loss": 0.7874, "step": 3773 }, { "epoch": 1.041103448275862, "grad_norm": 3.8154537677764893, "learning_rate": 9.331342904912768e-06, "loss": 0.8114, "step": 3774 }, { "epoch": 1.0413793103448277, "grad_norm": 4.204812526702881, "learning_rate": 9.330887067815201e-06, "loss": 0.7486, "step": 3775 }, { "epoch": 1.041655172413793, "grad_norm": 4.397768974304199, "learning_rate": 9.330431086535582e-06, "loss": 0.925, "step": 3776 }, { "epoch": 1.0419310344827586, "grad_norm": 3.772263765335083, "learning_rate": 9.329974961089094e-06, "loss": 0.7615, "step": 3777 }, { "epoch": 1.0422068965517242, "grad_norm": 3.9604198932647705, "learning_rate": 9.32951869149092e-06, "loss": 0.8183, "step": 3778 }, { "epoch": 1.0424827586206897, "grad_norm": 3.8094725608825684, "learning_rate": 9.329062277756253e-06, "loss": 0.7466, "step": 3779 }, { "epoch": 1.042758620689655, "grad_norm": 5.335730075836182, "learning_rate": 9.328605719900286e-06, "loss": 0.743, "step": 3780 }, { "epoch": 1.0430344827586207, "grad_norm": 3.75877046585083, "learning_rate": 9.328149017938217e-06, "loss": 0.8403, "step": 3781 }, { "epoch": 1.0433103448275862, "grad_norm": 3.7490620613098145, "learning_rate": 9.327692171885254e-06, "loss": 0.8723, "step": 3782 }, { "epoch": 1.0435862068965518, "grad_norm": 3.9411561489105225, "learning_rate": 9.327235181756603e-06, "loss": 0.8179, "step": 3783 }, { "epoch": 1.0438620689655171, "grad_norm": 3.6437854766845703, "learning_rate": 9.326778047567478e-06, "loss": 0.6837, "step": 3784 }, { "epoch": 1.0441379310344827, "grad_norm": 3.548070192337036, "learning_rate": 9.3263207693331e-06, "loss": 0.6697, "step": 3785 }, { "epoch": 1.0444137931034483, "grad_norm": 3.9548771381378174, "learning_rate": 9.32586334706869e-06, "loss": 0.8004, "step": 3786 }, { "epoch": 1.0446896551724139, "grad_norm": 3.882301092147827, "learning_rate": 9.325405780789478e-06, "loss": 0.9454, "step": 3787 }, { "epoch": 1.0449655172413792, "grad_norm": 3.568592071533203, "learning_rate": 9.324948070510696e-06, "loss": 0.7432, "step": 3788 }, { "epoch": 1.0452413793103448, "grad_norm": 3.8760483264923096, "learning_rate": 9.324490216247582e-06, "loss": 0.78, "step": 3789 }, { "epoch": 1.0455172413793103, "grad_norm": 3.4467127323150635, "learning_rate": 9.324032218015381e-06, "loss": 0.6765, "step": 3790 }, { "epoch": 1.045793103448276, "grad_norm": 4.684268951416016, "learning_rate": 9.323574075829338e-06, "loss": 0.9984, "step": 3791 }, { "epoch": 1.0460689655172413, "grad_norm": 4.154022693634033, "learning_rate": 9.323115789704704e-06, "loss": 0.7047, "step": 3792 }, { "epoch": 1.0463448275862068, "grad_norm": 3.9396655559539795, "learning_rate": 9.32265735965674e-06, "loss": 0.8412, "step": 3793 }, { "epoch": 1.0466206896551724, "grad_norm": 4.24268913269043, "learning_rate": 9.322198785700708e-06, "loss": 0.8581, "step": 3794 }, { "epoch": 1.046896551724138, "grad_norm": 4.3345232009887695, "learning_rate": 9.321740067851868e-06, "loss": 0.8744, "step": 3795 }, { "epoch": 1.0471724137931036, "grad_norm": 4.3393235206604, "learning_rate": 9.321281206125498e-06, "loss": 0.9568, "step": 3796 }, { "epoch": 1.047448275862069, "grad_norm": 3.6740009784698486, "learning_rate": 9.320822200536872e-06, "loss": 0.7433, "step": 3797 }, { "epoch": 1.0477241379310345, "grad_norm": 4.180270195007324, "learning_rate": 9.320363051101271e-06, "loss": 0.7281, "step": 3798 }, { "epoch": 1.048, "grad_norm": 3.7729499340057373, "learning_rate": 9.319903757833981e-06, "loss": 0.6939, "step": 3799 }, { "epoch": 1.0482758620689656, "grad_norm": 3.7520313262939453, "learning_rate": 9.319444320750292e-06, "loss": 0.7191, "step": 3800 }, { "epoch": 1.048551724137931, "grad_norm": 3.9558606147766113, "learning_rate": 9.318984739865502e-06, "loss": 0.8981, "step": 3801 }, { "epoch": 1.0488275862068965, "grad_norm": 4.1687846183776855, "learning_rate": 9.318525015194906e-06, "loss": 0.693, "step": 3802 }, { "epoch": 1.049103448275862, "grad_norm": 4.050071716308594, "learning_rate": 9.318065146753814e-06, "loss": 0.7827, "step": 3803 }, { "epoch": 1.0493793103448277, "grad_norm": 3.973759412765503, "learning_rate": 9.317605134557535e-06, "loss": 0.7062, "step": 3804 }, { "epoch": 1.049655172413793, "grad_norm": 3.2305335998535156, "learning_rate": 9.31714497862138e-06, "loss": 0.6837, "step": 3805 }, { "epoch": 1.0499310344827586, "grad_norm": 4.151558876037598, "learning_rate": 9.316684678960673e-06, "loss": 0.7898, "step": 3806 }, { "epoch": 1.0502068965517242, "grad_norm": 3.81597900390625, "learning_rate": 9.316224235590733e-06, "loss": 0.7059, "step": 3807 }, { "epoch": 1.0504827586206897, "grad_norm": 4.277355194091797, "learning_rate": 9.315763648526894e-06, "loss": 0.7997, "step": 3808 }, { "epoch": 1.050758620689655, "grad_norm": 4.126785755157471, "learning_rate": 9.315302917784485e-06, "loss": 0.813, "step": 3809 }, { "epoch": 1.0510344827586207, "grad_norm": 3.917470932006836, "learning_rate": 9.314842043378849e-06, "loss": 0.7822, "step": 3810 }, { "epoch": 1.0513103448275862, "grad_norm": 4.125871658325195, "learning_rate": 9.314381025325326e-06, "loss": 0.8245, "step": 3811 }, { "epoch": 1.0515862068965518, "grad_norm": 4.401699066162109, "learning_rate": 9.313919863639265e-06, "loss": 0.9802, "step": 3812 }, { "epoch": 1.0518620689655171, "grad_norm": 3.553952693939209, "learning_rate": 9.31345855833602e-06, "loss": 0.7406, "step": 3813 }, { "epoch": 1.0521379310344827, "grad_norm": 3.456374406814575, "learning_rate": 9.312997109430945e-06, "loss": 0.7563, "step": 3814 }, { "epoch": 1.0524137931034483, "grad_norm": 3.9821603298187256, "learning_rate": 9.312535516939407e-06, "loss": 0.9592, "step": 3815 }, { "epoch": 1.0526896551724139, "grad_norm": 3.5769858360290527, "learning_rate": 9.31207378087677e-06, "loss": 0.6839, "step": 3816 }, { "epoch": 1.0529655172413792, "grad_norm": 4.134587287902832, "learning_rate": 9.311611901258407e-06, "loss": 0.9253, "step": 3817 }, { "epoch": 1.0532413793103448, "grad_norm": 3.828528881072998, "learning_rate": 9.311149878099695e-06, "loss": 0.8033, "step": 3818 }, { "epoch": 1.0535172413793104, "grad_norm": 3.6895787715911865, "learning_rate": 9.310687711416015e-06, "loss": 0.8122, "step": 3819 }, { "epoch": 1.053793103448276, "grad_norm": 3.7307639122009277, "learning_rate": 9.310225401222752e-06, "loss": 0.8149, "step": 3820 }, { "epoch": 1.0540689655172413, "grad_norm": 4.028548717498779, "learning_rate": 9.309762947535298e-06, "loss": 0.8464, "step": 3821 }, { "epoch": 1.0543448275862068, "grad_norm": 3.5895113945007324, "learning_rate": 9.309300350369052e-06, "loss": 0.7933, "step": 3822 }, { "epoch": 1.0546206896551724, "grad_norm": 3.849513053894043, "learning_rate": 9.308837609739409e-06, "loss": 0.8074, "step": 3823 }, { "epoch": 1.054896551724138, "grad_norm": 3.632395029067993, "learning_rate": 9.308374725661779e-06, "loss": 0.758, "step": 3824 }, { "epoch": 1.0551724137931036, "grad_norm": 3.617055654525757, "learning_rate": 9.307911698151568e-06, "loss": 0.7404, "step": 3825 }, { "epoch": 1.055448275862069, "grad_norm": 3.947826623916626, "learning_rate": 9.307448527224194e-06, "loss": 0.7387, "step": 3826 }, { "epoch": 1.0557241379310345, "grad_norm": 4.347990036010742, "learning_rate": 9.306985212895077e-06, "loss": 0.8881, "step": 3827 }, { "epoch": 1.056, "grad_norm": 3.801884412765503, "learning_rate": 9.30652175517964e-06, "loss": 0.8321, "step": 3828 }, { "epoch": 1.0562758620689656, "grad_norm": 3.982815980911255, "learning_rate": 9.306058154093312e-06, "loss": 0.7118, "step": 3829 }, { "epoch": 1.056551724137931, "grad_norm": 4.297782897949219, "learning_rate": 9.305594409651527e-06, "loss": 0.7834, "step": 3830 }, { "epoch": 1.0568275862068965, "grad_norm": 3.620546579360962, "learning_rate": 9.305130521869723e-06, "loss": 0.7512, "step": 3831 }, { "epoch": 1.0571034482758621, "grad_norm": 3.9674599170684814, "learning_rate": 9.304666490763347e-06, "loss": 0.7438, "step": 3832 }, { "epoch": 1.0573793103448277, "grad_norm": 3.769731044769287, "learning_rate": 9.304202316347846e-06, "loss": 0.7232, "step": 3833 }, { "epoch": 1.057655172413793, "grad_norm": 4.033661365509033, "learning_rate": 9.303737998638672e-06, "loss": 0.7647, "step": 3834 }, { "epoch": 1.0579310344827586, "grad_norm": 3.5498790740966797, "learning_rate": 9.30327353765128e-06, "loss": 0.632, "step": 3835 }, { "epoch": 1.0582068965517242, "grad_norm": 4.180737018585205, "learning_rate": 9.30280893340114e-06, "loss": 0.7902, "step": 3836 }, { "epoch": 1.0584827586206897, "grad_norm": 4.222261905670166, "learning_rate": 9.302344185903713e-06, "loss": 0.7495, "step": 3837 }, { "epoch": 1.058758620689655, "grad_norm": 4.580286502838135, "learning_rate": 9.301879295174472e-06, "loss": 0.9402, "step": 3838 }, { "epoch": 1.0590344827586207, "grad_norm": 3.7510507106781006, "learning_rate": 9.301414261228897e-06, "loss": 0.8204, "step": 3839 }, { "epoch": 1.0593103448275862, "grad_norm": 3.7810473442077637, "learning_rate": 9.300949084082469e-06, "loss": 0.7213, "step": 3840 }, { "epoch": 1.0595862068965518, "grad_norm": 4.348148822784424, "learning_rate": 9.300483763750673e-06, "loss": 0.8389, "step": 3841 }, { "epoch": 1.0598620689655172, "grad_norm": 3.912891149520874, "learning_rate": 9.300018300248999e-06, "loss": 0.7009, "step": 3842 }, { "epoch": 1.0601379310344827, "grad_norm": 4.0483503341674805, "learning_rate": 9.299552693592946e-06, "loss": 0.7935, "step": 3843 }, { "epoch": 1.0604137931034483, "grad_norm": 4.573018550872803, "learning_rate": 9.299086943798012e-06, "loss": 0.8104, "step": 3844 }, { "epoch": 1.0606896551724139, "grad_norm": 4.052532196044922, "learning_rate": 9.298621050879706e-06, "loss": 0.7804, "step": 3845 }, { "epoch": 1.0609655172413792, "grad_norm": 3.8262879848480225, "learning_rate": 9.298155014853536e-06, "loss": 0.906, "step": 3846 }, { "epoch": 1.0612413793103448, "grad_norm": 3.5153212547302246, "learning_rate": 9.297688835735018e-06, "loss": 0.7137, "step": 3847 }, { "epoch": 1.0615172413793104, "grad_norm": 3.821439504623413, "learning_rate": 9.29722251353967e-06, "loss": 0.8986, "step": 3848 }, { "epoch": 1.061793103448276, "grad_norm": 3.915506601333618, "learning_rate": 9.29675604828302e-06, "loss": 0.7613, "step": 3849 }, { "epoch": 1.0620689655172413, "grad_norm": 3.6492016315460205, "learning_rate": 9.296289439980593e-06, "loss": 0.7947, "step": 3850 }, { "epoch": 1.0623448275862069, "grad_norm": 3.88242769241333, "learning_rate": 9.295822688647927e-06, "loss": 0.8205, "step": 3851 }, { "epoch": 1.0626206896551724, "grad_norm": 4.372849941253662, "learning_rate": 9.295355794300558e-06, "loss": 0.9099, "step": 3852 }, { "epoch": 1.062896551724138, "grad_norm": 3.8354198932647705, "learning_rate": 9.294888756954031e-06, "loss": 0.7458, "step": 3853 }, { "epoch": 1.0631724137931036, "grad_norm": 4.550748825073242, "learning_rate": 9.294421576623895e-06, "loss": 0.9169, "step": 3854 }, { "epoch": 1.063448275862069, "grad_norm": 4.642521858215332, "learning_rate": 9.293954253325703e-06, "loss": 0.7729, "step": 3855 }, { "epoch": 1.0637241379310345, "grad_norm": 4.160675048828125, "learning_rate": 9.29348678707501e-06, "loss": 0.8906, "step": 3856 }, { "epoch": 1.064, "grad_norm": 3.9924724102020264, "learning_rate": 9.293019177887384e-06, "loss": 0.8367, "step": 3857 }, { "epoch": 1.0642758620689654, "grad_norm": 4.071469306945801, "learning_rate": 9.29255142577839e-06, "loss": 0.8049, "step": 3858 }, { "epoch": 1.064551724137931, "grad_norm": 3.8044791221618652, "learning_rate": 9.292083530763597e-06, "loss": 0.9148, "step": 3859 }, { "epoch": 1.0648275862068965, "grad_norm": 4.180686950683594, "learning_rate": 9.291615492858586e-06, "loss": 0.7769, "step": 3860 }, { "epoch": 1.0651034482758621, "grad_norm": 4.33135986328125, "learning_rate": 9.291147312078937e-06, "loss": 0.9172, "step": 3861 }, { "epoch": 1.0653793103448277, "grad_norm": 3.721806526184082, "learning_rate": 9.290678988440236e-06, "loss": 0.857, "step": 3862 }, { "epoch": 1.065655172413793, "grad_norm": 3.6510696411132812, "learning_rate": 9.290210521958077e-06, "loss": 0.83, "step": 3863 }, { "epoch": 1.0659310344827586, "grad_norm": 3.488189458847046, "learning_rate": 9.289741912648054e-06, "loss": 0.7407, "step": 3864 }, { "epoch": 1.0662068965517242, "grad_norm": 4.080086708068848, "learning_rate": 9.289273160525768e-06, "loss": 0.9087, "step": 3865 }, { "epoch": 1.0664827586206898, "grad_norm": 3.8889107704162598, "learning_rate": 9.288804265606824e-06, "loss": 0.8162, "step": 3866 }, { "epoch": 1.066758620689655, "grad_norm": 4.254446983337402, "learning_rate": 9.288335227906833e-06, "loss": 0.8181, "step": 3867 }, { "epoch": 1.0670344827586207, "grad_norm": 4.250031471252441, "learning_rate": 9.287866047441409e-06, "loss": 0.7838, "step": 3868 }, { "epoch": 1.0673103448275862, "grad_norm": 4.142595291137695, "learning_rate": 9.287396724226173e-06, "loss": 0.8514, "step": 3869 }, { "epoch": 1.0675862068965518, "grad_norm": 3.924983024597168, "learning_rate": 9.286927258276747e-06, "loss": 0.6869, "step": 3870 }, { "epoch": 1.0678620689655172, "grad_norm": 4.302878379821777, "learning_rate": 9.286457649608765e-06, "loss": 0.7578, "step": 3871 }, { "epoch": 1.0681379310344827, "grad_norm": 3.9749202728271484, "learning_rate": 9.285987898237856e-06, "loss": 0.8727, "step": 3872 }, { "epoch": 1.0684137931034483, "grad_norm": 4.164331436157227, "learning_rate": 9.28551800417966e-06, "loss": 0.8664, "step": 3873 }, { "epoch": 1.0686896551724139, "grad_norm": 4.000192165374756, "learning_rate": 9.285047967449825e-06, "loss": 0.8146, "step": 3874 }, { "epoch": 1.0689655172413792, "grad_norm": 3.5307774543762207, "learning_rate": 9.284577788063994e-06, "loss": 0.8173, "step": 3875 }, { "epoch": 1.0692413793103448, "grad_norm": 3.95241379737854, "learning_rate": 9.284107466037821e-06, "loss": 0.8554, "step": 3876 }, { "epoch": 1.0695172413793104, "grad_norm": 3.8924434185028076, "learning_rate": 9.283637001386966e-06, "loss": 0.7486, "step": 3877 }, { "epoch": 1.069793103448276, "grad_norm": 4.422730922698975, "learning_rate": 9.283166394127088e-06, "loss": 0.7749, "step": 3878 }, { "epoch": 1.0700689655172413, "grad_norm": 3.8322665691375732, "learning_rate": 9.282695644273858e-06, "loss": 0.8534, "step": 3879 }, { "epoch": 1.0703448275862069, "grad_norm": 4.250410556793213, "learning_rate": 9.282224751842946e-06, "loss": 0.8719, "step": 3880 }, { "epoch": 1.0706206896551724, "grad_norm": 3.991304397583008, "learning_rate": 9.281753716850029e-06, "loss": 0.7919, "step": 3881 }, { "epoch": 1.070896551724138, "grad_norm": 3.8701164722442627, "learning_rate": 9.281282539310788e-06, "loss": 0.8128, "step": 3882 }, { "epoch": 1.0711724137931034, "grad_norm": 3.967442750930786, "learning_rate": 9.28081121924091e-06, "loss": 0.8253, "step": 3883 }, { "epoch": 1.071448275862069, "grad_norm": 3.9456751346588135, "learning_rate": 9.280339756656085e-06, "loss": 0.7808, "step": 3884 }, { "epoch": 1.0717241379310345, "grad_norm": 3.8933582305908203, "learning_rate": 9.27986815157201e-06, "loss": 0.8041, "step": 3885 }, { "epoch": 1.072, "grad_norm": 3.8139524459838867, "learning_rate": 9.279396404004386e-06, "loss": 0.9479, "step": 3886 }, { "epoch": 1.0722758620689654, "grad_norm": 4.413954257965088, "learning_rate": 9.278924513968917e-06, "loss": 0.8866, "step": 3887 }, { "epoch": 1.072551724137931, "grad_norm": 3.836292266845703, "learning_rate": 9.278452481481311e-06, "loss": 0.7135, "step": 3888 }, { "epoch": 1.0728275862068966, "grad_norm": 3.9205856323242188, "learning_rate": 9.277980306557285e-06, "loss": 0.8714, "step": 3889 }, { "epoch": 1.0731034482758621, "grad_norm": 4.429620742797852, "learning_rate": 9.27750798921256e-06, "loss": 0.7431, "step": 3890 }, { "epoch": 1.0733793103448277, "grad_norm": 4.0762176513671875, "learning_rate": 9.277035529462859e-06, "loss": 0.7439, "step": 3891 }, { "epoch": 1.073655172413793, "grad_norm": 3.979285955429077, "learning_rate": 9.276562927323908e-06, "loss": 0.7919, "step": 3892 }, { "epoch": 1.0739310344827586, "grad_norm": 3.990755796432495, "learning_rate": 9.276090182811445e-06, "loss": 0.8458, "step": 3893 }, { "epoch": 1.0742068965517242, "grad_norm": 3.8359603881835938, "learning_rate": 9.275617295941206e-06, "loss": 0.7954, "step": 3894 }, { "epoch": 1.0744827586206895, "grad_norm": 3.9644832611083984, "learning_rate": 9.275144266728934e-06, "loss": 0.8823, "step": 3895 }, { "epoch": 1.074758620689655, "grad_norm": 3.4729299545288086, "learning_rate": 9.27467109519038e-06, "loss": 0.7211, "step": 3896 }, { "epoch": 1.0750344827586207, "grad_norm": 4.290532112121582, "learning_rate": 9.274197781341292e-06, "loss": 0.7515, "step": 3897 }, { "epoch": 1.0753103448275863, "grad_norm": 3.617082118988037, "learning_rate": 9.273724325197429e-06, "loss": 0.6298, "step": 3898 }, { "epoch": 1.0755862068965518, "grad_norm": 4.210318565368652, "learning_rate": 9.273250726774555e-06, "loss": 0.7453, "step": 3899 }, { "epoch": 1.0758620689655172, "grad_norm": 3.9012420177459717, "learning_rate": 9.272776986088435e-06, "loss": 0.8234, "step": 3900 }, { "epoch": 1.0761379310344827, "grad_norm": 3.8616268634796143, "learning_rate": 9.27230310315484e-06, "loss": 0.7749, "step": 3901 }, { "epoch": 1.0764137931034483, "grad_norm": 4.099451065063477, "learning_rate": 9.271829077989546e-06, "loss": 0.7273, "step": 3902 }, { "epoch": 1.0766896551724139, "grad_norm": 3.7536914348602295, "learning_rate": 9.271354910608338e-06, "loss": 0.6555, "step": 3903 }, { "epoch": 1.0769655172413792, "grad_norm": 3.928684711456299, "learning_rate": 9.270880601027e-06, "loss": 0.6732, "step": 3904 }, { "epoch": 1.0772413793103448, "grad_norm": 3.1190848350524902, "learning_rate": 9.27040614926132e-06, "loss": 0.6822, "step": 3905 }, { "epoch": 1.0775172413793104, "grad_norm": 3.562662363052368, "learning_rate": 9.269931555327095e-06, "loss": 0.8056, "step": 3906 }, { "epoch": 1.077793103448276, "grad_norm": 4.176436901092529, "learning_rate": 9.269456819240125e-06, "loss": 0.9013, "step": 3907 }, { "epoch": 1.0780689655172413, "grad_norm": 4.0064215660095215, "learning_rate": 9.268981941016213e-06, "loss": 0.7922, "step": 3908 }, { "epoch": 1.0783448275862069, "grad_norm": 3.3916735649108887, "learning_rate": 9.268506920671172e-06, "loss": 0.799, "step": 3909 }, { "epoch": 1.0786206896551724, "grad_norm": 4.023165702819824, "learning_rate": 9.268031758220814e-06, "loss": 0.86, "step": 3910 }, { "epoch": 1.078896551724138, "grad_norm": 3.7339751720428467, "learning_rate": 9.267556453680957e-06, "loss": 0.7482, "step": 3911 }, { "epoch": 1.0791724137931034, "grad_norm": 3.717040538787842, "learning_rate": 9.267081007067427e-06, "loss": 0.7715, "step": 3912 }, { "epoch": 1.079448275862069, "grad_norm": 3.809201955795288, "learning_rate": 9.26660541839605e-06, "loss": 0.7365, "step": 3913 }, { "epoch": 1.0797241379310345, "grad_norm": 3.650033712387085, "learning_rate": 9.266129687682662e-06, "loss": 0.8828, "step": 3914 }, { "epoch": 1.08, "grad_norm": 4.369842529296875, "learning_rate": 9.265653814943099e-06, "loss": 0.8265, "step": 3915 }, { "epoch": 1.0802758620689654, "grad_norm": 3.9689269065856934, "learning_rate": 9.265177800193202e-06, "loss": 0.7986, "step": 3916 }, { "epoch": 1.080551724137931, "grad_norm": 4.015738487243652, "learning_rate": 9.26470164344882e-06, "loss": 0.708, "step": 3917 }, { "epoch": 1.0808275862068966, "grad_norm": 4.044905185699463, "learning_rate": 9.264225344725806e-06, "loss": 0.7349, "step": 3918 }, { "epoch": 1.0811034482758621, "grad_norm": 3.701840400695801, "learning_rate": 9.263748904040015e-06, "loss": 0.673, "step": 3919 }, { "epoch": 1.0813793103448275, "grad_norm": 3.6245696544647217, "learning_rate": 9.263272321407309e-06, "loss": 0.7578, "step": 3920 }, { "epoch": 1.081655172413793, "grad_norm": 3.7602291107177734, "learning_rate": 9.262795596843555e-06, "loss": 0.7112, "step": 3921 }, { "epoch": 1.0819310344827586, "grad_norm": 4.412290573120117, "learning_rate": 9.262318730364622e-06, "loss": 0.8839, "step": 3922 }, { "epoch": 1.0822068965517242, "grad_norm": 4.042190074920654, "learning_rate": 9.261841721986387e-06, "loss": 0.7903, "step": 3923 }, { "epoch": 1.0824827586206895, "grad_norm": 3.872708559036255, "learning_rate": 9.261364571724731e-06, "loss": 0.7384, "step": 3924 }, { "epoch": 1.0827586206896551, "grad_norm": 4.0865278244018555, "learning_rate": 9.260887279595536e-06, "loss": 0.7973, "step": 3925 }, { "epoch": 1.0830344827586207, "grad_norm": 3.8247015476226807, "learning_rate": 9.260409845614697e-06, "loss": 0.8569, "step": 3926 }, { "epoch": 1.0833103448275863, "grad_norm": 3.829880475997925, "learning_rate": 9.259932269798105e-06, "loss": 0.7905, "step": 3927 }, { "epoch": 1.0835862068965518, "grad_norm": 4.356420040130615, "learning_rate": 9.259454552161658e-06, "loss": 0.8884, "step": 3928 }, { "epoch": 1.0838620689655172, "grad_norm": 3.950687885284424, "learning_rate": 9.258976692721262e-06, "loss": 0.7623, "step": 3929 }, { "epoch": 1.0841379310344827, "grad_norm": 3.749335765838623, "learning_rate": 9.258498691492828e-06, "loss": 0.7573, "step": 3930 }, { "epoch": 1.0844137931034483, "grad_norm": 3.7202436923980713, "learning_rate": 9.258020548492266e-06, "loss": 0.7072, "step": 3931 }, { "epoch": 1.084689655172414, "grad_norm": 3.6727797985076904, "learning_rate": 9.257542263735493e-06, "loss": 0.876, "step": 3932 }, { "epoch": 1.0849655172413792, "grad_norm": 3.6783134937286377, "learning_rate": 9.257063837238436e-06, "loss": 0.7951, "step": 3933 }, { "epoch": 1.0852413793103448, "grad_norm": 3.628054141998291, "learning_rate": 9.25658526901702e-06, "loss": 0.7374, "step": 3934 }, { "epoch": 1.0855172413793104, "grad_norm": 3.8953053951263428, "learning_rate": 9.256106559087177e-06, "loss": 0.7742, "step": 3935 }, { "epoch": 1.085793103448276, "grad_norm": 4.161144733428955, "learning_rate": 9.255627707464846e-06, "loss": 0.7434, "step": 3936 }, { "epoch": 1.0860689655172413, "grad_norm": 3.6481008529663086, "learning_rate": 9.255148714165967e-06, "loss": 0.7752, "step": 3937 }, { "epoch": 1.0863448275862069, "grad_norm": 4.315282821655273, "learning_rate": 9.254669579206486e-06, "loss": 0.8194, "step": 3938 }, { "epoch": 1.0866206896551724, "grad_norm": 3.5578577518463135, "learning_rate": 9.254190302602356e-06, "loss": 0.7124, "step": 3939 }, { "epoch": 1.086896551724138, "grad_norm": 4.339129447937012, "learning_rate": 9.253710884369533e-06, "loss": 0.7874, "step": 3940 }, { "epoch": 1.0871724137931034, "grad_norm": 4.262515068054199, "learning_rate": 9.253231324523975e-06, "loss": 0.8021, "step": 3941 }, { "epoch": 1.087448275862069, "grad_norm": 4.279687881469727, "learning_rate": 9.252751623081649e-06, "loss": 0.8858, "step": 3942 }, { "epoch": 1.0877241379310345, "grad_norm": 4.067002773284912, "learning_rate": 9.252271780058525e-06, "loss": 0.7042, "step": 3943 }, { "epoch": 1.088, "grad_norm": 3.9525339603424072, "learning_rate": 9.251791795470578e-06, "loss": 0.7396, "step": 3944 }, { "epoch": 1.0882758620689654, "grad_norm": 3.735281229019165, "learning_rate": 9.251311669333787e-06, "loss": 0.7667, "step": 3945 }, { "epoch": 1.088551724137931, "grad_norm": 4.003368377685547, "learning_rate": 9.250831401664136e-06, "loss": 0.9404, "step": 3946 }, { "epoch": 1.0888275862068966, "grad_norm": 3.970507860183716, "learning_rate": 9.250350992477615e-06, "loss": 0.8654, "step": 3947 }, { "epoch": 1.0891034482758621, "grad_norm": 3.7723052501678467, "learning_rate": 9.249870441790214e-06, "loss": 0.7862, "step": 3948 }, { "epoch": 1.0893793103448275, "grad_norm": 3.642833709716797, "learning_rate": 9.249389749617936e-06, "loss": 0.7938, "step": 3949 }, { "epoch": 1.089655172413793, "grad_norm": 3.590127944946289, "learning_rate": 9.248908915976781e-06, "loss": 0.7232, "step": 3950 }, { "epoch": 1.0899310344827586, "grad_norm": 3.9044201374053955, "learning_rate": 9.24842794088276e-06, "loss": 0.7716, "step": 3951 }, { "epoch": 1.0902068965517242, "grad_norm": 4.1040263175964355, "learning_rate": 9.24794682435188e-06, "loss": 0.7054, "step": 3952 }, { "epoch": 1.0904827586206896, "grad_norm": 3.850667953491211, "learning_rate": 9.247465566400163e-06, "loss": 0.8169, "step": 3953 }, { "epoch": 1.0907586206896551, "grad_norm": 3.7181472778320312, "learning_rate": 9.24698416704363e-06, "loss": 0.7548, "step": 3954 }, { "epoch": 1.0910344827586207, "grad_norm": 3.942866325378418, "learning_rate": 9.246502626298303e-06, "loss": 0.8291, "step": 3955 }, { "epoch": 1.0913103448275863, "grad_norm": 3.61964750289917, "learning_rate": 9.24602094418022e-06, "loss": 0.8087, "step": 3956 }, { "epoch": 1.0915862068965518, "grad_norm": 4.162998199462891, "learning_rate": 9.245539120705414e-06, "loss": 0.7893, "step": 3957 }, { "epoch": 1.0918620689655172, "grad_norm": 4.0708160400390625, "learning_rate": 9.245057155889922e-06, "loss": 0.822, "step": 3958 }, { "epoch": 1.0921379310344828, "grad_norm": 3.965134859085083, "learning_rate": 9.244575049749795e-06, "loss": 0.842, "step": 3959 }, { "epoch": 1.0924137931034483, "grad_norm": 4.042157173156738, "learning_rate": 9.244092802301081e-06, "loss": 0.8879, "step": 3960 }, { "epoch": 1.0926896551724137, "grad_norm": 3.6985621452331543, "learning_rate": 9.243610413559837e-06, "loss": 0.802, "step": 3961 }, { "epoch": 1.0929655172413792, "grad_norm": 4.046943664550781, "learning_rate": 9.243127883542116e-06, "loss": 0.877, "step": 3962 }, { "epoch": 1.0932413793103448, "grad_norm": 3.7988991737365723, "learning_rate": 9.242645212263988e-06, "loss": 0.7967, "step": 3963 }, { "epoch": 1.0935172413793104, "grad_norm": 3.5659453868865967, "learning_rate": 9.24216239974152e-06, "loss": 0.7865, "step": 3964 }, { "epoch": 1.093793103448276, "grad_norm": 4.42900276184082, "learning_rate": 9.241679445990786e-06, "loss": 0.9658, "step": 3965 }, { "epoch": 1.0940689655172413, "grad_norm": 3.814695119857788, "learning_rate": 9.241196351027863e-06, "loss": 0.8846, "step": 3966 }, { "epoch": 1.0943448275862069, "grad_norm": 3.975876808166504, "learning_rate": 9.240713114868837e-06, "loss": 0.7386, "step": 3967 }, { "epoch": 1.0946206896551725, "grad_norm": 4.154804706573486, "learning_rate": 9.240229737529793e-06, "loss": 0.8563, "step": 3968 }, { "epoch": 1.094896551724138, "grad_norm": 3.8196969032287598, "learning_rate": 9.239746219026825e-06, "loss": 0.6948, "step": 3969 }, { "epoch": 1.0951724137931034, "grad_norm": 4.010246276855469, "learning_rate": 9.239262559376028e-06, "loss": 0.8122, "step": 3970 }, { "epoch": 1.095448275862069, "grad_norm": 3.868546724319458, "learning_rate": 9.238778758593506e-06, "loss": 0.7665, "step": 3971 }, { "epoch": 1.0957241379310345, "grad_norm": 4.358020782470703, "learning_rate": 9.238294816695362e-06, "loss": 0.8461, "step": 3972 }, { "epoch": 1.096, "grad_norm": 3.5022542476654053, "learning_rate": 9.237810733697712e-06, "loss": 0.7896, "step": 3973 }, { "epoch": 1.0962758620689654, "grad_norm": 4.07645845413208, "learning_rate": 9.237326509616668e-06, "loss": 0.771, "step": 3974 }, { "epoch": 1.096551724137931, "grad_norm": 4.058377265930176, "learning_rate": 9.236842144468355e-06, "loss": 0.6944, "step": 3975 }, { "epoch": 1.0968275862068966, "grad_norm": 4.261073112487793, "learning_rate": 9.236357638268893e-06, "loss": 0.8408, "step": 3976 }, { "epoch": 1.0971034482758621, "grad_norm": 3.849898338317871, "learning_rate": 9.235872991034416e-06, "loss": 0.7857, "step": 3977 }, { "epoch": 1.0973793103448275, "grad_norm": 3.726483106613159, "learning_rate": 9.235388202781057e-06, "loss": 0.8182, "step": 3978 }, { "epoch": 1.097655172413793, "grad_norm": 3.924131155014038, "learning_rate": 9.234903273524955e-06, "loss": 0.7087, "step": 3979 }, { "epoch": 1.0979310344827586, "grad_norm": 3.9249701499938965, "learning_rate": 9.234418203282254e-06, "loss": 0.802, "step": 3980 }, { "epoch": 1.0982068965517242, "grad_norm": 4.027190208435059, "learning_rate": 9.233932992069104e-06, "loss": 0.7652, "step": 3981 }, { "epoch": 1.0984827586206896, "grad_norm": 3.5720791816711426, "learning_rate": 9.233447639901657e-06, "loss": 0.7944, "step": 3982 }, { "epoch": 1.0987586206896551, "grad_norm": 3.8005223274230957, "learning_rate": 9.232962146796072e-06, "loss": 0.7746, "step": 3983 }, { "epoch": 1.0990344827586207, "grad_norm": 3.6721315383911133, "learning_rate": 9.232476512768513e-06, "loss": 0.7605, "step": 3984 }, { "epoch": 1.0993103448275863, "grad_norm": 3.597531318664551, "learning_rate": 9.231990737835145e-06, "loss": 0.7299, "step": 3985 }, { "epoch": 1.0995862068965516, "grad_norm": 3.701545476913452, "learning_rate": 9.231504822012142e-06, "loss": 0.6734, "step": 3986 }, { "epoch": 1.0998620689655172, "grad_norm": 3.759495973587036, "learning_rate": 9.23101876531568e-06, "loss": 0.8896, "step": 3987 }, { "epoch": 1.1001379310344828, "grad_norm": 3.934662103652954, "learning_rate": 9.230532567761941e-06, "loss": 0.8792, "step": 3988 }, { "epoch": 1.1004137931034483, "grad_norm": 3.856515884399414, "learning_rate": 9.23004622936711e-06, "loss": 0.8039, "step": 3989 }, { "epoch": 1.1006896551724137, "grad_norm": 3.721714973449707, "learning_rate": 9.22955975014738e-06, "loss": 0.7788, "step": 3990 }, { "epoch": 1.1009655172413793, "grad_norm": 3.6748909950256348, "learning_rate": 9.229073130118949e-06, "loss": 0.7026, "step": 3991 }, { "epoch": 1.1012413793103448, "grad_norm": 3.6838061809539795, "learning_rate": 9.22858636929801e-06, "loss": 0.7323, "step": 3992 }, { "epoch": 1.1015172413793104, "grad_norm": 3.82231068611145, "learning_rate": 9.228099467700773e-06, "loss": 0.8569, "step": 3993 }, { "epoch": 1.101793103448276, "grad_norm": 4.277632236480713, "learning_rate": 9.227612425343448e-06, "loss": 0.9376, "step": 3994 }, { "epoch": 1.1020689655172413, "grad_norm": 3.8770334720611572, "learning_rate": 9.227125242242248e-06, "loss": 0.7651, "step": 3995 }, { "epoch": 1.1023448275862069, "grad_norm": 3.675358533859253, "learning_rate": 9.226637918413393e-06, "loss": 0.6399, "step": 3996 }, { "epoch": 1.1026206896551725, "grad_norm": 3.777207851409912, "learning_rate": 9.226150453873104e-06, "loss": 0.6992, "step": 3997 }, { "epoch": 1.102896551724138, "grad_norm": 3.927715539932251, "learning_rate": 9.225662848637614e-06, "loss": 0.8501, "step": 3998 }, { "epoch": 1.1031724137931034, "grad_norm": 4.100836753845215, "learning_rate": 9.225175102723154e-06, "loss": 0.9011, "step": 3999 }, { "epoch": 1.103448275862069, "grad_norm": 3.6274514198303223, "learning_rate": 9.22468721614596e-06, "loss": 0.7203, "step": 4000 }, { "epoch": 1.103448275862069, "eval_loss": 1.2888739109039307, "eval_runtime": 13.8084, "eval_samples_per_second": 28.968, "eval_steps_per_second": 3.621, "step": 4000 }, { "epoch": 1.1037241379310345, "grad_norm": 3.762080669403076, "learning_rate": 9.224199188922279e-06, "loss": 0.5495, "step": 4001 }, { "epoch": 1.104, "grad_norm": 3.775116205215454, "learning_rate": 9.223711021068352e-06, "loss": 0.7192, "step": 4002 }, { "epoch": 1.1042758620689654, "grad_norm": 3.9552791118621826, "learning_rate": 9.223222712600435e-06, "loss": 0.7847, "step": 4003 }, { "epoch": 1.104551724137931, "grad_norm": 3.6374459266662598, "learning_rate": 9.222734263534785e-06, "loss": 0.8025, "step": 4004 }, { "epoch": 1.1048275862068966, "grad_norm": 4.182751655578613, "learning_rate": 9.222245673887662e-06, "loss": 0.7499, "step": 4005 }, { "epoch": 1.1051034482758622, "grad_norm": 4.258684158325195, "learning_rate": 9.22175694367533e-06, "loss": 0.7986, "step": 4006 }, { "epoch": 1.1053793103448275, "grad_norm": 4.028139114379883, "learning_rate": 9.221268072914063e-06, "loss": 0.775, "step": 4007 }, { "epoch": 1.105655172413793, "grad_norm": 4.359968185424805, "learning_rate": 9.220779061620135e-06, "loss": 0.8468, "step": 4008 }, { "epoch": 1.1059310344827586, "grad_norm": 3.7817747592926025, "learning_rate": 9.220289909809827e-06, "loss": 0.7181, "step": 4009 }, { "epoch": 1.1062068965517242, "grad_norm": 4.217072010040283, "learning_rate": 9.21980061749942e-06, "loss": 0.79, "step": 4010 }, { "epoch": 1.1064827586206896, "grad_norm": 4.225046157836914, "learning_rate": 9.219311184705206e-06, "loss": 0.9071, "step": 4011 }, { "epoch": 1.1067586206896551, "grad_norm": 3.6321213245391846, "learning_rate": 9.21882161144348e-06, "loss": 0.8133, "step": 4012 }, { "epoch": 1.1070344827586207, "grad_norm": 4.032390117645264, "learning_rate": 9.21833189773054e-06, "loss": 0.7472, "step": 4013 }, { "epoch": 1.1073103448275863, "grad_norm": 4.112706661224365, "learning_rate": 9.217842043582688e-06, "loss": 0.8093, "step": 4014 }, { "epoch": 1.1075862068965516, "grad_norm": 3.389960765838623, "learning_rate": 9.217352049016231e-06, "loss": 0.8022, "step": 4015 }, { "epoch": 1.1078620689655172, "grad_norm": 3.9739553928375244, "learning_rate": 9.216861914047485e-06, "loss": 0.8277, "step": 4016 }, { "epoch": 1.1081379310344828, "grad_norm": 3.882812976837158, "learning_rate": 9.216371638692766e-06, "loss": 0.7751, "step": 4017 }, { "epoch": 1.1084137931034483, "grad_norm": 3.7135376930236816, "learning_rate": 9.215881222968395e-06, "loss": 0.8678, "step": 4018 }, { "epoch": 1.1086896551724137, "grad_norm": 3.294532299041748, "learning_rate": 9.2153906668907e-06, "loss": 0.785, "step": 4019 }, { "epoch": 1.1089655172413793, "grad_norm": 4.133664131164551, "learning_rate": 9.214899970476012e-06, "loss": 0.8279, "step": 4020 }, { "epoch": 1.1092413793103448, "grad_norm": 3.8655340671539307, "learning_rate": 9.214409133740667e-06, "loss": 0.8857, "step": 4021 }, { "epoch": 1.1095172413793104, "grad_norm": 3.605602741241455, "learning_rate": 9.213918156701005e-06, "loss": 0.657, "step": 4022 }, { "epoch": 1.109793103448276, "grad_norm": 3.6237566471099854, "learning_rate": 9.213427039373372e-06, "loss": 0.6237, "step": 4023 }, { "epoch": 1.1100689655172413, "grad_norm": 4.057729244232178, "learning_rate": 9.21293578177412e-06, "loss": 0.8807, "step": 4024 }, { "epoch": 1.110344827586207, "grad_norm": 3.4182333946228027, "learning_rate": 9.212444383919601e-06, "loss": 0.7375, "step": 4025 }, { "epoch": 1.1106206896551725, "grad_norm": 4.285341739654541, "learning_rate": 9.211952845826176e-06, "loss": 0.8658, "step": 4026 }, { "epoch": 1.1108965517241378, "grad_norm": 4.267273902893066, "learning_rate": 9.211461167510208e-06, "loss": 0.8018, "step": 4027 }, { "epoch": 1.1111724137931034, "grad_norm": 4.286179065704346, "learning_rate": 9.210969348988065e-06, "loss": 0.8926, "step": 4028 }, { "epoch": 1.111448275862069, "grad_norm": 3.5182533264160156, "learning_rate": 9.210477390276122e-06, "loss": 0.8115, "step": 4029 }, { "epoch": 1.1117241379310345, "grad_norm": 4.031195163726807, "learning_rate": 9.209985291390757e-06, "loss": 0.7858, "step": 4030 }, { "epoch": 1.112, "grad_norm": 4.053535461425781, "learning_rate": 9.209493052348354e-06, "loss": 0.8093, "step": 4031 }, { "epoch": 1.1122758620689654, "grad_norm": 4.088314056396484, "learning_rate": 9.209000673165296e-06, "loss": 0.7363, "step": 4032 }, { "epoch": 1.112551724137931, "grad_norm": 4.207711696624756, "learning_rate": 9.208508153857979e-06, "loss": 0.7759, "step": 4033 }, { "epoch": 1.1128275862068966, "grad_norm": 4.359138488769531, "learning_rate": 9.2080154944428e-06, "loss": 0.8137, "step": 4034 }, { "epoch": 1.1131034482758622, "grad_norm": 3.8136789798736572, "learning_rate": 9.207522694936157e-06, "loss": 0.6873, "step": 4035 }, { "epoch": 1.1133793103448275, "grad_norm": 4.0134124755859375, "learning_rate": 9.20702975535446e-06, "loss": 0.7985, "step": 4036 }, { "epoch": 1.113655172413793, "grad_norm": 3.8299503326416016, "learning_rate": 9.206536675714116e-06, "loss": 0.7446, "step": 4037 }, { "epoch": 1.1139310344827587, "grad_norm": 3.91050386428833, "learning_rate": 9.206043456031543e-06, "loss": 0.7427, "step": 4038 }, { "epoch": 1.1142068965517242, "grad_norm": 3.8590610027313232, "learning_rate": 9.205550096323159e-06, "loss": 0.8362, "step": 4039 }, { "epoch": 1.1144827586206896, "grad_norm": 4.091381072998047, "learning_rate": 9.205056596605392e-06, "loss": 0.8381, "step": 4040 }, { "epoch": 1.1147586206896551, "grad_norm": 3.666677236557007, "learning_rate": 9.20456295689467e-06, "loss": 0.6844, "step": 4041 }, { "epoch": 1.1150344827586207, "grad_norm": 4.089267253875732, "learning_rate": 9.204069177207425e-06, "loss": 0.6622, "step": 4042 }, { "epoch": 1.1153103448275863, "grad_norm": 3.9876863956451416, "learning_rate": 9.203575257560096e-06, "loss": 0.8399, "step": 4043 }, { "epoch": 1.1155862068965516, "grad_norm": 3.8056256771087646, "learning_rate": 9.20308119796913e-06, "loss": 0.7756, "step": 4044 }, { "epoch": 1.1158620689655172, "grad_norm": 4.813003063201904, "learning_rate": 9.202586998450972e-06, "loss": 0.7024, "step": 4045 }, { "epoch": 1.1161379310344828, "grad_norm": 4.197500705718994, "learning_rate": 9.202092659022073e-06, "loss": 0.7331, "step": 4046 }, { "epoch": 1.1164137931034483, "grad_norm": 3.951809883117676, "learning_rate": 9.201598179698893e-06, "loss": 0.7865, "step": 4047 }, { "epoch": 1.1166896551724137, "grad_norm": 4.117329120635986, "learning_rate": 9.201103560497895e-06, "loss": 0.914, "step": 4048 }, { "epoch": 1.1169655172413793, "grad_norm": 4.391449451446533, "learning_rate": 9.200608801435544e-06, "loss": 0.8323, "step": 4049 }, { "epoch": 1.1172413793103448, "grad_norm": 3.8768510818481445, "learning_rate": 9.200113902528312e-06, "loss": 0.7524, "step": 4050 }, { "epoch": 1.1175172413793104, "grad_norm": 3.9872348308563232, "learning_rate": 9.199618863792675e-06, "loss": 0.7772, "step": 4051 }, { "epoch": 1.1177931034482758, "grad_norm": 4.5454230308532715, "learning_rate": 9.199123685245112e-06, "loss": 0.7441, "step": 4052 }, { "epoch": 1.1180689655172413, "grad_norm": 3.9309864044189453, "learning_rate": 9.19862836690211e-06, "loss": 0.7562, "step": 4053 }, { "epoch": 1.118344827586207, "grad_norm": 3.9888763427734375, "learning_rate": 9.198132908780157e-06, "loss": 0.8376, "step": 4054 }, { "epoch": 1.1186206896551725, "grad_norm": 3.8356428146362305, "learning_rate": 9.19763731089575e-06, "loss": 0.7559, "step": 4055 }, { "epoch": 1.1188965517241378, "grad_norm": 3.937666654586792, "learning_rate": 9.197141573265388e-06, "loss": 0.7453, "step": 4056 }, { "epoch": 1.1191724137931034, "grad_norm": 3.817838430404663, "learning_rate": 9.19664569590557e-06, "loss": 0.827, "step": 4057 }, { "epoch": 1.119448275862069, "grad_norm": 3.9607162475585938, "learning_rate": 9.196149678832811e-06, "loss": 0.8213, "step": 4058 }, { "epoch": 1.1197241379310345, "grad_norm": 4.008541107177734, "learning_rate": 9.195653522063623e-06, "loss": 0.7778, "step": 4059 }, { "epoch": 1.12, "grad_norm": 3.5605690479278564, "learning_rate": 9.195157225614522e-06, "loss": 0.8104, "step": 4060 }, { "epoch": 1.1202758620689655, "grad_norm": 3.8794021606445312, "learning_rate": 9.19466078950203e-06, "loss": 0.6694, "step": 4061 }, { "epoch": 1.120551724137931, "grad_norm": 3.8308568000793457, "learning_rate": 9.194164213742676e-06, "loss": 0.7382, "step": 4062 }, { "epoch": 1.1208275862068966, "grad_norm": 3.8035435676574707, "learning_rate": 9.193667498352991e-06, "loss": 0.712, "step": 4063 }, { "epoch": 1.1211034482758622, "grad_norm": 4.223592281341553, "learning_rate": 9.193170643349511e-06, "loss": 0.9089, "step": 4064 }, { "epoch": 1.1213793103448275, "grad_norm": 3.6806294918060303, "learning_rate": 9.192673648748777e-06, "loss": 0.7696, "step": 4065 }, { "epoch": 1.121655172413793, "grad_norm": 3.8869786262512207, "learning_rate": 9.192176514567338e-06, "loss": 0.7036, "step": 4066 }, { "epoch": 1.1219310344827587, "grad_norm": 4.2052202224731445, "learning_rate": 9.191679240821738e-06, "loss": 0.7677, "step": 4067 }, { "epoch": 1.1222068965517242, "grad_norm": 4.395499229431152, "learning_rate": 9.191181827528537e-06, "loss": 0.9668, "step": 4068 }, { "epoch": 1.1224827586206896, "grad_norm": 3.823000192642212, "learning_rate": 9.190684274704294e-06, "loss": 0.6733, "step": 4069 }, { "epoch": 1.1227586206896552, "grad_norm": 3.6304750442504883, "learning_rate": 9.190186582365572e-06, "loss": 0.7093, "step": 4070 }, { "epoch": 1.1230344827586207, "grad_norm": 3.7758843898773193, "learning_rate": 9.189688750528942e-06, "loss": 0.8911, "step": 4071 }, { "epoch": 1.1233103448275863, "grad_norm": 3.806640386581421, "learning_rate": 9.189190779210974e-06, "loss": 0.8982, "step": 4072 }, { "epoch": 1.1235862068965516, "grad_norm": 4.221133232116699, "learning_rate": 9.18869266842825e-06, "loss": 0.8339, "step": 4073 }, { "epoch": 1.1238620689655172, "grad_norm": 3.632439613342285, "learning_rate": 9.188194418197352e-06, "loss": 0.7506, "step": 4074 }, { "epoch": 1.1241379310344828, "grad_norm": 3.9118082523345947, "learning_rate": 9.187696028534865e-06, "loss": 0.7636, "step": 4075 }, { "epoch": 1.1244137931034484, "grad_norm": 3.8031697273254395, "learning_rate": 9.187197499457383e-06, "loss": 0.7253, "step": 4076 }, { "epoch": 1.1246896551724137, "grad_norm": 3.8259527683258057, "learning_rate": 9.186698830981504e-06, "loss": 0.6941, "step": 4077 }, { "epoch": 1.1249655172413793, "grad_norm": 3.9475162029266357, "learning_rate": 9.186200023123829e-06, "loss": 0.7951, "step": 4078 }, { "epoch": 1.1252413793103448, "grad_norm": 4.321331024169922, "learning_rate": 9.18570107590096e-06, "loss": 0.7995, "step": 4079 }, { "epoch": 1.1255172413793104, "grad_norm": 4.286113262176514, "learning_rate": 9.185201989329515e-06, "loss": 0.8693, "step": 4080 }, { "epoch": 1.1257931034482758, "grad_norm": 4.0546793937683105, "learning_rate": 9.184702763426104e-06, "loss": 0.806, "step": 4081 }, { "epoch": 1.1260689655172413, "grad_norm": 3.972532272338867, "learning_rate": 9.184203398207347e-06, "loss": 0.727, "step": 4082 }, { "epoch": 1.126344827586207, "grad_norm": 3.7805328369140625, "learning_rate": 9.183703893689873e-06, "loss": 0.7344, "step": 4083 }, { "epoch": 1.1266206896551725, "grad_norm": 3.3352208137512207, "learning_rate": 9.183204249890308e-06, "loss": 0.7544, "step": 4084 }, { "epoch": 1.1268965517241378, "grad_norm": 3.7822515964508057, "learning_rate": 9.182704466825285e-06, "loss": 0.7221, "step": 4085 }, { "epoch": 1.1271724137931034, "grad_norm": 3.972386360168457, "learning_rate": 9.182204544511446e-06, "loss": 0.7731, "step": 4086 }, { "epoch": 1.127448275862069, "grad_norm": 4.2125983238220215, "learning_rate": 9.18170448296543e-06, "loss": 0.714, "step": 4087 }, { "epoch": 1.1277241379310345, "grad_norm": 3.9787018299102783, "learning_rate": 9.181204282203889e-06, "loss": 0.6696, "step": 4088 }, { "epoch": 1.1280000000000001, "grad_norm": 3.6161532402038574, "learning_rate": 9.180703942243472e-06, "loss": 0.7664, "step": 4089 }, { "epoch": 1.1282758620689655, "grad_norm": 4.09244966506958, "learning_rate": 9.180203463100838e-06, "loss": 0.736, "step": 4090 }, { "epoch": 1.128551724137931, "grad_norm": 3.87964129447937, "learning_rate": 9.179702844792648e-06, "loss": 0.7345, "step": 4091 }, { "epoch": 1.1288275862068966, "grad_norm": 4.250854015350342, "learning_rate": 9.17920208733557e-06, "loss": 0.783, "step": 4092 }, { "epoch": 1.129103448275862, "grad_norm": 4.436977863311768, "learning_rate": 9.178701190746273e-06, "loss": 0.848, "step": 4093 }, { "epoch": 1.1293793103448275, "grad_norm": 4.241678714752197, "learning_rate": 9.178200155041434e-06, "loss": 0.8827, "step": 4094 }, { "epoch": 1.129655172413793, "grad_norm": 4.332181930541992, "learning_rate": 9.177698980237733e-06, "loss": 0.811, "step": 4095 }, { "epoch": 1.1299310344827587, "grad_norm": 4.191722393035889, "learning_rate": 9.177197666351854e-06, "loss": 0.7166, "step": 4096 }, { "epoch": 1.1302068965517242, "grad_norm": 4.1991658210754395, "learning_rate": 9.176696213400485e-06, "loss": 0.9319, "step": 4097 }, { "epoch": 1.1304827586206896, "grad_norm": 4.0600690841674805, "learning_rate": 9.176194621400325e-06, "loss": 0.9444, "step": 4098 }, { "epoch": 1.1307586206896552, "grad_norm": 4.349521636962891, "learning_rate": 9.175692890368068e-06, "loss": 0.9411, "step": 4099 }, { "epoch": 1.1310344827586207, "grad_norm": 4.057042121887207, "learning_rate": 9.17519102032042e-06, "loss": 0.731, "step": 4100 }, { "epoch": 1.1313103448275863, "grad_norm": 3.4029669761657715, "learning_rate": 9.17468901127409e-06, "loss": 0.6448, "step": 4101 }, { "epoch": 1.1315862068965516, "grad_norm": 3.7614803314208984, "learning_rate": 9.174186863245787e-06, "loss": 0.7487, "step": 4102 }, { "epoch": 1.1318620689655172, "grad_norm": 3.7124762535095215, "learning_rate": 9.173684576252233e-06, "loss": 0.799, "step": 4103 }, { "epoch": 1.1321379310344828, "grad_norm": 4.13900089263916, "learning_rate": 9.173182150310145e-06, "loss": 0.8332, "step": 4104 }, { "epoch": 1.1324137931034484, "grad_norm": 4.135030269622803, "learning_rate": 9.172679585436253e-06, "loss": 0.8376, "step": 4105 }, { "epoch": 1.1326896551724137, "grad_norm": 3.9678120613098145, "learning_rate": 9.172176881647285e-06, "loss": 0.7244, "step": 4106 }, { "epoch": 1.1329655172413793, "grad_norm": 3.660001039505005, "learning_rate": 9.171674038959981e-06, "loss": 0.6782, "step": 4107 }, { "epoch": 1.1332413793103449, "grad_norm": 3.4995219707489014, "learning_rate": 9.17117105739108e-06, "loss": 0.8226, "step": 4108 }, { "epoch": 1.1335172413793104, "grad_norm": 3.9772822856903076, "learning_rate": 9.170667936957324e-06, "loss": 0.8251, "step": 4109 }, { "epoch": 1.1337931034482758, "grad_norm": 3.8161633014678955, "learning_rate": 9.170164677675468e-06, "loss": 0.7477, "step": 4110 }, { "epoch": 1.1340689655172413, "grad_norm": 3.7896227836608887, "learning_rate": 9.169661279562262e-06, "loss": 0.793, "step": 4111 }, { "epoch": 1.134344827586207, "grad_norm": 3.411149501800537, "learning_rate": 9.169157742634465e-06, "loss": 0.8291, "step": 4112 }, { "epoch": 1.1346206896551725, "grad_norm": 3.8797354698181152, "learning_rate": 9.168654066908843e-06, "loss": 0.7975, "step": 4113 }, { "epoch": 1.1348965517241378, "grad_norm": 4.118332386016846, "learning_rate": 9.168150252402164e-06, "loss": 0.8365, "step": 4114 }, { "epoch": 1.1351724137931034, "grad_norm": 4.012099266052246, "learning_rate": 9.167646299131197e-06, "loss": 0.87, "step": 4115 }, { "epoch": 1.135448275862069, "grad_norm": 3.807255983352661, "learning_rate": 9.167142207112724e-06, "loss": 0.7607, "step": 4116 }, { "epoch": 1.1357241379310345, "grad_norm": 3.5528461933135986, "learning_rate": 9.166637976363526e-06, "loss": 0.711, "step": 4117 }, { "epoch": 1.1360000000000001, "grad_norm": 4.462495803833008, "learning_rate": 9.166133606900386e-06, "loss": 0.8572, "step": 4118 }, { "epoch": 1.1362758620689655, "grad_norm": 4.050676345825195, "learning_rate": 9.1656290987401e-06, "loss": 0.8182, "step": 4119 }, { "epoch": 1.136551724137931, "grad_norm": 3.5402626991271973, "learning_rate": 9.16512445189946e-06, "loss": 0.8087, "step": 4120 }, { "epoch": 1.1368275862068966, "grad_norm": 4.499654769897461, "learning_rate": 9.16461966639527e-06, "loss": 0.8558, "step": 4121 }, { "epoch": 1.137103448275862, "grad_norm": 3.5510098934173584, "learning_rate": 9.164114742244334e-06, "loss": 0.718, "step": 4122 }, { "epoch": 1.1373793103448275, "grad_norm": 4.108923435211182, "learning_rate": 9.163609679463462e-06, "loss": 0.9512, "step": 4123 }, { "epoch": 1.137655172413793, "grad_norm": 3.1921143531799316, "learning_rate": 9.163104478069464e-06, "loss": 0.7801, "step": 4124 }, { "epoch": 1.1379310344827587, "grad_norm": 4.377459526062012, "learning_rate": 9.162599138079166e-06, "loss": 0.7537, "step": 4125 }, { "epoch": 1.1382068965517242, "grad_norm": 3.875164270401001, "learning_rate": 9.162093659509386e-06, "loss": 0.7771, "step": 4126 }, { "epoch": 1.1384827586206896, "grad_norm": 4.1872758865356445, "learning_rate": 9.161588042376954e-06, "loss": 0.8522, "step": 4127 }, { "epoch": 1.1387586206896552, "grad_norm": 4.097344398498535, "learning_rate": 9.161082286698705e-06, "loss": 0.7467, "step": 4128 }, { "epoch": 1.1390344827586207, "grad_norm": 3.6406900882720947, "learning_rate": 9.160576392491474e-06, "loss": 0.8751, "step": 4129 }, { "epoch": 1.139310344827586, "grad_norm": 3.900125503540039, "learning_rate": 9.160070359772101e-06, "loss": 0.7066, "step": 4130 }, { "epoch": 1.1395862068965517, "grad_norm": 4.132261753082275, "learning_rate": 9.159564188557438e-06, "loss": 0.8254, "step": 4131 }, { "epoch": 1.1398620689655172, "grad_norm": 3.955362319946289, "learning_rate": 9.159057878864333e-06, "loss": 0.8408, "step": 4132 }, { "epoch": 1.1401379310344828, "grad_norm": 3.983161687850952, "learning_rate": 9.15855143070964e-06, "loss": 0.8073, "step": 4133 }, { "epoch": 1.1404137931034484, "grad_norm": 4.336175441741943, "learning_rate": 9.158044844110223e-06, "loss": 0.7017, "step": 4134 }, { "epoch": 1.1406896551724137, "grad_norm": 4.09613037109375, "learning_rate": 9.157538119082946e-06, "loss": 0.8355, "step": 4135 }, { "epoch": 1.1409655172413793, "grad_norm": 4.083890914916992, "learning_rate": 9.157031255644678e-06, "loss": 0.8438, "step": 4136 }, { "epoch": 1.1412413793103449, "grad_norm": 3.9614076614379883, "learning_rate": 9.156524253812293e-06, "loss": 0.7365, "step": 4137 }, { "epoch": 1.1415172413793104, "grad_norm": 3.640437364578247, "learning_rate": 9.156017113602671e-06, "loss": 0.7808, "step": 4138 }, { "epoch": 1.1417931034482758, "grad_norm": 3.8966917991638184, "learning_rate": 9.155509835032695e-06, "loss": 0.8611, "step": 4139 }, { "epoch": 1.1420689655172414, "grad_norm": 4.02409029006958, "learning_rate": 9.155002418119254e-06, "loss": 0.804, "step": 4140 }, { "epoch": 1.142344827586207, "grad_norm": 3.8212897777557373, "learning_rate": 9.15449486287924e-06, "loss": 0.8621, "step": 4141 }, { "epoch": 1.1426206896551725, "grad_norm": 3.9819791316986084, "learning_rate": 9.153987169329549e-06, "loss": 0.7519, "step": 4142 }, { "epoch": 1.1428965517241378, "grad_norm": 3.965837001800537, "learning_rate": 9.153479337487083e-06, "loss": 0.856, "step": 4143 }, { "epoch": 1.1431724137931034, "grad_norm": 3.6360809803009033, "learning_rate": 9.152971367368751e-06, "loss": 0.7272, "step": 4144 }, { "epoch": 1.143448275862069, "grad_norm": 3.8846988677978516, "learning_rate": 9.152463258991462e-06, "loss": 0.7788, "step": 4145 }, { "epoch": 1.1437241379310346, "grad_norm": 4.168307304382324, "learning_rate": 9.151955012372134e-06, "loss": 0.8584, "step": 4146 }, { "epoch": 1.144, "grad_norm": 4.375912189483643, "learning_rate": 9.151446627527685e-06, "loss": 0.8421, "step": 4147 }, { "epoch": 1.1442758620689655, "grad_norm": 3.831514596939087, "learning_rate": 9.15093810447504e-06, "loss": 0.7659, "step": 4148 }, { "epoch": 1.144551724137931, "grad_norm": 3.6919562816619873, "learning_rate": 9.15042944323113e-06, "loss": 0.7255, "step": 4149 }, { "epoch": 1.1448275862068966, "grad_norm": 4.482250690460205, "learning_rate": 9.149920643812889e-06, "loss": 0.853, "step": 4150 }, { "epoch": 1.145103448275862, "grad_norm": 3.9160470962524414, "learning_rate": 9.149411706237253e-06, "loss": 0.8304, "step": 4151 }, { "epoch": 1.1453793103448275, "grad_norm": 3.5661377906799316, "learning_rate": 9.148902630521169e-06, "loss": 0.7306, "step": 4152 }, { "epoch": 1.145655172413793, "grad_norm": 3.910518169403076, "learning_rate": 9.148393416681583e-06, "loss": 0.8601, "step": 4153 }, { "epoch": 1.1459310344827587, "grad_norm": 3.7434628009796143, "learning_rate": 9.147884064735447e-06, "loss": 0.7106, "step": 4154 }, { "epoch": 1.1462068965517243, "grad_norm": 3.6519484519958496, "learning_rate": 9.14737457469972e-06, "loss": 0.8237, "step": 4155 }, { "epoch": 1.1464827586206896, "grad_norm": 3.8065993785858154, "learning_rate": 9.146864946591362e-06, "loss": 0.7112, "step": 4156 }, { "epoch": 1.1467586206896552, "grad_norm": 3.904703140258789, "learning_rate": 9.146355180427342e-06, "loss": 0.8053, "step": 4157 }, { "epoch": 1.1470344827586207, "grad_norm": 4.181909084320068, "learning_rate": 9.145845276224628e-06, "loss": 0.8105, "step": 4158 }, { "epoch": 1.147310344827586, "grad_norm": 3.8197579383850098, "learning_rate": 9.145335234000195e-06, "loss": 0.8193, "step": 4159 }, { "epoch": 1.1475862068965517, "grad_norm": 3.923466205596924, "learning_rate": 9.144825053771026e-06, "loss": 0.7967, "step": 4160 }, { "epoch": 1.1478620689655172, "grad_norm": 3.950709342956543, "learning_rate": 9.144314735554104e-06, "loss": 0.7221, "step": 4161 }, { "epoch": 1.1481379310344828, "grad_norm": 3.972080707550049, "learning_rate": 9.143804279366418e-06, "loss": 0.8015, "step": 4162 }, { "epoch": 1.1484137931034484, "grad_norm": 4.009434223175049, "learning_rate": 9.143293685224964e-06, "loss": 0.798, "step": 4163 }, { "epoch": 1.1486896551724137, "grad_norm": 4.244087219238281, "learning_rate": 9.142782953146739e-06, "loss": 0.8708, "step": 4164 }, { "epoch": 1.1489655172413793, "grad_norm": 3.844465494155884, "learning_rate": 9.142272083148744e-06, "loss": 0.8484, "step": 4165 }, { "epoch": 1.1492413793103449, "grad_norm": 4.287088394165039, "learning_rate": 9.14176107524799e-06, "loss": 0.8583, "step": 4166 }, { "epoch": 1.1495172413793104, "grad_norm": 4.365010738372803, "learning_rate": 9.141249929461488e-06, "loss": 0.8232, "step": 4167 }, { "epoch": 1.1497931034482758, "grad_norm": 3.92401385307312, "learning_rate": 9.140738645806255e-06, "loss": 0.7649, "step": 4168 }, { "epoch": 1.1500689655172414, "grad_norm": 3.931795835494995, "learning_rate": 9.140227224299312e-06, "loss": 0.9019, "step": 4169 }, { "epoch": 1.150344827586207, "grad_norm": 3.847705841064453, "learning_rate": 9.139715664957685e-06, "loss": 0.7861, "step": 4170 }, { "epoch": 1.1506206896551725, "grad_norm": 3.5235486030578613, "learning_rate": 9.139203967798406e-06, "loss": 0.7035, "step": 4171 }, { "epoch": 1.1508965517241379, "grad_norm": 3.8415591716766357, "learning_rate": 9.138692132838508e-06, "loss": 0.8159, "step": 4172 }, { "epoch": 1.1511724137931034, "grad_norm": 3.6531214714050293, "learning_rate": 9.138180160095033e-06, "loss": 0.7068, "step": 4173 }, { "epoch": 1.151448275862069, "grad_norm": 3.8335354328155518, "learning_rate": 9.137668049585022e-06, "loss": 0.8742, "step": 4174 }, { "epoch": 1.1517241379310346, "grad_norm": 3.87774395942688, "learning_rate": 9.13715580132553e-06, "loss": 0.7898, "step": 4175 }, { "epoch": 1.152, "grad_norm": 4.154683589935303, "learning_rate": 9.136643415333604e-06, "loss": 0.8466, "step": 4176 }, { "epoch": 1.1522758620689655, "grad_norm": 3.6972036361694336, "learning_rate": 9.136130891626305e-06, "loss": 0.787, "step": 4177 }, { "epoch": 1.152551724137931, "grad_norm": 3.8208532333374023, "learning_rate": 9.135618230220696e-06, "loss": 0.7853, "step": 4178 }, { "epoch": 1.1528275862068966, "grad_norm": 4.297207355499268, "learning_rate": 9.135105431133845e-06, "loss": 0.7462, "step": 4179 }, { "epoch": 1.153103448275862, "grad_norm": 3.6917946338653564, "learning_rate": 9.134592494382821e-06, "loss": 0.8607, "step": 4180 }, { "epoch": 1.1533793103448275, "grad_norm": 3.693671941757202, "learning_rate": 9.134079419984704e-06, "loss": 0.8489, "step": 4181 }, { "epoch": 1.1536551724137931, "grad_norm": 3.6148791313171387, "learning_rate": 9.133566207956572e-06, "loss": 0.7515, "step": 4182 }, { "epoch": 1.1539310344827587, "grad_norm": 3.9532923698425293, "learning_rate": 9.133052858315514e-06, "loss": 0.7636, "step": 4183 }, { "epoch": 1.1542068965517243, "grad_norm": 3.3845911026000977, "learning_rate": 9.132539371078617e-06, "loss": 0.6912, "step": 4184 }, { "epoch": 1.1544827586206896, "grad_norm": 3.96462082862854, "learning_rate": 9.132025746262977e-06, "loss": 0.8043, "step": 4185 }, { "epoch": 1.1547586206896552, "grad_norm": 3.746053457260132, "learning_rate": 9.131511983885693e-06, "loss": 0.7626, "step": 4186 }, { "epoch": 1.1550344827586208, "grad_norm": 4.040029525756836, "learning_rate": 9.130998083963871e-06, "loss": 0.8302, "step": 4187 }, { "epoch": 1.155310344827586, "grad_norm": 3.7126123905181885, "learning_rate": 9.130484046514616e-06, "loss": 0.7602, "step": 4188 }, { "epoch": 1.1555862068965517, "grad_norm": 3.98268985748291, "learning_rate": 9.129969871555045e-06, "loss": 0.7543, "step": 4189 }, { "epoch": 1.1558620689655172, "grad_norm": 4.1390461921691895, "learning_rate": 9.129455559102272e-06, "loss": 0.7792, "step": 4190 }, { "epoch": 1.1561379310344828, "grad_norm": 4.022786617279053, "learning_rate": 9.128941109173421e-06, "loss": 0.7726, "step": 4191 }, { "epoch": 1.1564137931034484, "grad_norm": 4.21690559387207, "learning_rate": 9.12842652178562e-06, "loss": 0.8602, "step": 4192 }, { "epoch": 1.1566896551724137, "grad_norm": 4.165976524353027, "learning_rate": 9.127911796955996e-06, "loss": 0.929, "step": 4193 }, { "epoch": 1.1569655172413793, "grad_norm": 4.186295509338379, "learning_rate": 9.127396934701693e-06, "loss": 0.7137, "step": 4194 }, { "epoch": 1.1572413793103449, "grad_norm": 3.9990336894989014, "learning_rate": 9.126881935039844e-06, "loss": 0.8746, "step": 4195 }, { "epoch": 1.1575172413793102, "grad_norm": 3.566013813018799, "learning_rate": 9.126366797987598e-06, "loss": 0.7303, "step": 4196 }, { "epoch": 1.1577931034482758, "grad_norm": 4.373837947845459, "learning_rate": 9.125851523562104e-06, "loss": 0.8248, "step": 4197 }, { "epoch": 1.1580689655172414, "grad_norm": 4.024346351623535, "learning_rate": 9.125336111780515e-06, "loss": 0.8527, "step": 4198 }, { "epoch": 1.158344827586207, "grad_norm": 4.218207836151123, "learning_rate": 9.124820562659991e-06, "loss": 0.8849, "step": 4199 }, { "epoch": 1.1586206896551725, "grad_norm": 4.613929271697998, "learning_rate": 9.124304876217695e-06, "loss": 0.9683, "step": 4200 }, { "epoch": 1.1588965517241379, "grad_norm": 4.567307949066162, "learning_rate": 9.123789052470796e-06, "loss": 0.8769, "step": 4201 }, { "epoch": 1.1591724137931034, "grad_norm": 3.743065595626831, "learning_rate": 9.123273091436467e-06, "loss": 0.797, "step": 4202 }, { "epoch": 1.159448275862069, "grad_norm": 3.7694895267486572, "learning_rate": 9.122756993131883e-06, "loss": 0.7898, "step": 4203 }, { "epoch": 1.1597241379310346, "grad_norm": 4.172540664672852, "learning_rate": 9.122240757574228e-06, "loss": 0.8099, "step": 4204 }, { "epoch": 1.16, "grad_norm": 3.992424964904785, "learning_rate": 9.121724384780686e-06, "loss": 0.8364, "step": 4205 }, { "epoch": 1.1602758620689655, "grad_norm": 4.451821804046631, "learning_rate": 9.121207874768447e-06, "loss": 0.8027, "step": 4206 }, { "epoch": 1.160551724137931, "grad_norm": 4.5269455909729, "learning_rate": 9.120691227554711e-06, "loss": 0.7843, "step": 4207 }, { "epoch": 1.1608275862068966, "grad_norm": 4.037773132324219, "learning_rate": 9.120174443156676e-06, "loss": 0.7966, "step": 4208 }, { "epoch": 1.161103448275862, "grad_norm": 3.9428510665893555, "learning_rate": 9.119657521591543e-06, "loss": 0.9768, "step": 4209 }, { "epoch": 1.1613793103448276, "grad_norm": 4.888501167297363, "learning_rate": 9.119140462876526e-06, "loss": 0.8579, "step": 4210 }, { "epoch": 1.1616551724137931, "grad_norm": 3.8943848609924316, "learning_rate": 9.118623267028836e-06, "loss": 0.8156, "step": 4211 }, { "epoch": 1.1619310344827587, "grad_norm": 3.7565364837646484, "learning_rate": 9.118105934065693e-06, "loss": 0.7403, "step": 4212 }, { "epoch": 1.162206896551724, "grad_norm": 4.270162105560303, "learning_rate": 9.11758846400432e-06, "loss": 0.7651, "step": 4213 }, { "epoch": 1.1624827586206896, "grad_norm": 3.4413650035858154, "learning_rate": 9.11707085686194e-06, "loss": 0.7222, "step": 4214 }, { "epoch": 1.1627586206896552, "grad_norm": 4.1323981285095215, "learning_rate": 9.11655311265579e-06, "loss": 0.7783, "step": 4215 }, { "epoch": 1.1630344827586208, "grad_norm": 4.276498794555664, "learning_rate": 9.116035231403104e-06, "loss": 0.8573, "step": 4216 }, { "epoch": 1.163310344827586, "grad_norm": 3.642765998840332, "learning_rate": 9.115517213121126e-06, "loss": 0.8215, "step": 4217 }, { "epoch": 1.1635862068965517, "grad_norm": 3.8418338298797607, "learning_rate": 9.114999057827097e-06, "loss": 0.8769, "step": 4218 }, { "epoch": 1.1638620689655172, "grad_norm": 4.0684590339660645, "learning_rate": 9.114480765538272e-06, "loss": 0.8544, "step": 4219 }, { "epoch": 1.1641379310344828, "grad_norm": 4.061769485473633, "learning_rate": 9.113962336271903e-06, "loss": 0.7608, "step": 4220 }, { "epoch": 1.1644137931034484, "grad_norm": 3.8488945960998535, "learning_rate": 9.113443770045248e-06, "loss": 0.7575, "step": 4221 }, { "epoch": 1.1646896551724137, "grad_norm": 3.763922929763794, "learning_rate": 9.112925066875573e-06, "loss": 0.8277, "step": 4222 }, { "epoch": 1.1649655172413793, "grad_norm": 4.267034530639648, "learning_rate": 9.112406226780147e-06, "loss": 0.7537, "step": 4223 }, { "epoch": 1.1652413793103449, "grad_norm": 3.705242156982422, "learning_rate": 9.111887249776241e-06, "loss": 0.8017, "step": 4224 }, { "epoch": 1.1655172413793102, "grad_norm": 4.309060573577881, "learning_rate": 9.111368135881135e-06, "loss": 0.7847, "step": 4225 }, { "epoch": 1.1657931034482758, "grad_norm": 4.160934925079346, "learning_rate": 9.110848885112108e-06, "loss": 0.688, "step": 4226 }, { "epoch": 1.1660689655172414, "grad_norm": 4.215296268463135, "learning_rate": 9.11032949748645e-06, "loss": 0.7932, "step": 4227 }, { "epoch": 1.166344827586207, "grad_norm": 4.052431583404541, "learning_rate": 9.109809973021451e-06, "loss": 0.7093, "step": 4228 }, { "epoch": 1.1666206896551725, "grad_norm": 3.771587610244751, "learning_rate": 9.109290311734407e-06, "loss": 0.778, "step": 4229 }, { "epoch": 1.1668965517241379, "grad_norm": 4.0121259689331055, "learning_rate": 9.108770513642615e-06, "loss": 0.7945, "step": 4230 }, { "epoch": 1.1671724137931034, "grad_norm": 4.498929500579834, "learning_rate": 9.108250578763387e-06, "loss": 0.8456, "step": 4231 }, { "epoch": 1.167448275862069, "grad_norm": 4.145181179046631, "learning_rate": 9.107730507114024e-06, "loss": 0.7318, "step": 4232 }, { "epoch": 1.1677241379310346, "grad_norm": 3.3942782878875732, "learning_rate": 9.107210298711846e-06, "loss": 0.8371, "step": 4233 }, { "epoch": 1.168, "grad_norm": 4.078708648681641, "learning_rate": 9.10668995357417e-06, "loss": 0.784, "step": 4234 }, { "epoch": 1.1682758620689655, "grad_norm": 3.550522565841675, "learning_rate": 9.106169471718318e-06, "loss": 0.7642, "step": 4235 }, { "epoch": 1.168551724137931, "grad_norm": 3.8310868740081787, "learning_rate": 9.10564885316162e-06, "loss": 0.8095, "step": 4236 }, { "epoch": 1.1688275862068966, "grad_norm": 4.030385971069336, "learning_rate": 9.105128097921405e-06, "loss": 0.872, "step": 4237 }, { "epoch": 1.169103448275862, "grad_norm": 3.6699399948120117, "learning_rate": 9.104607206015013e-06, "loss": 0.7666, "step": 4238 }, { "epoch": 1.1693793103448276, "grad_norm": 3.9723541736602783, "learning_rate": 9.104086177459782e-06, "loss": 0.7556, "step": 4239 }, { "epoch": 1.1696551724137931, "grad_norm": 4.307277679443359, "learning_rate": 9.103565012273062e-06, "loss": 0.9494, "step": 4240 }, { "epoch": 1.1699310344827587, "grad_norm": 3.5735509395599365, "learning_rate": 9.1030437104722e-06, "loss": 0.5855, "step": 4241 }, { "epoch": 1.170206896551724, "grad_norm": 3.846402645111084, "learning_rate": 9.10252227207455e-06, "loss": 0.8546, "step": 4242 }, { "epoch": 1.1704827586206896, "grad_norm": 3.819929599761963, "learning_rate": 9.102000697097475e-06, "loss": 0.7982, "step": 4243 }, { "epoch": 1.1707586206896552, "grad_norm": 4.25028133392334, "learning_rate": 9.101478985558339e-06, "loss": 0.7211, "step": 4244 }, { "epoch": 1.1710344827586208, "grad_norm": 4.034930229187012, "learning_rate": 9.100957137474507e-06, "loss": 0.7317, "step": 4245 }, { "epoch": 1.1713103448275861, "grad_norm": 3.676645517349243, "learning_rate": 9.100435152863356e-06, "loss": 0.7405, "step": 4246 }, { "epoch": 1.1715862068965517, "grad_norm": 4.050161361694336, "learning_rate": 9.09991303174226e-06, "loss": 0.8314, "step": 4247 }, { "epoch": 1.1718620689655173, "grad_norm": 3.938460350036621, "learning_rate": 9.099390774128605e-06, "loss": 0.8922, "step": 4248 }, { "epoch": 1.1721379310344828, "grad_norm": 3.738223075866699, "learning_rate": 9.098868380039776e-06, "loss": 0.9134, "step": 4249 }, { "epoch": 1.1724137931034484, "grad_norm": 3.876711368560791, "learning_rate": 9.098345849493165e-06, "loss": 0.8492, "step": 4250 }, { "epoch": 1.1726896551724137, "grad_norm": 3.776768207550049, "learning_rate": 9.097823182506165e-06, "loss": 0.7373, "step": 4251 }, { "epoch": 1.1729655172413793, "grad_norm": 4.028566837310791, "learning_rate": 9.09730037909618e-06, "loss": 0.8666, "step": 4252 }, { "epoch": 1.173241379310345, "grad_norm": 4.224350452423096, "learning_rate": 9.096777439280613e-06, "loss": 0.763, "step": 4253 }, { "epoch": 1.1735172413793102, "grad_norm": 3.9856016635894775, "learning_rate": 9.096254363076875e-06, "loss": 0.9456, "step": 4254 }, { "epoch": 1.1737931034482758, "grad_norm": 4.131013870239258, "learning_rate": 9.095731150502377e-06, "loss": 0.7707, "step": 4255 }, { "epoch": 1.1740689655172414, "grad_norm": 3.937079668045044, "learning_rate": 9.095207801574542e-06, "loss": 0.7958, "step": 4256 }, { "epoch": 1.174344827586207, "grad_norm": 3.594508409500122, "learning_rate": 9.09468431631079e-06, "loss": 0.798, "step": 4257 }, { "epoch": 1.1746206896551725, "grad_norm": 3.599576711654663, "learning_rate": 9.094160694728548e-06, "loss": 0.7838, "step": 4258 }, { "epoch": 1.1748965517241379, "grad_norm": 3.3909831047058105, "learning_rate": 9.093636936845252e-06, "loss": 0.6942, "step": 4259 }, { "epoch": 1.1751724137931034, "grad_norm": 3.916792154312134, "learning_rate": 9.093113042678336e-06, "loss": 0.823, "step": 4260 }, { "epoch": 1.175448275862069, "grad_norm": 4.092735767364502, "learning_rate": 9.09258901224524e-06, "loss": 0.7451, "step": 4261 }, { "epoch": 1.1757241379310344, "grad_norm": 3.924683094024658, "learning_rate": 9.092064845563413e-06, "loss": 0.8859, "step": 4262 }, { "epoch": 1.176, "grad_norm": 6.722187519073486, "learning_rate": 9.091540542650303e-06, "loss": 0.9586, "step": 4263 }, { "epoch": 1.1762758620689655, "grad_norm": 3.6218321323394775, "learning_rate": 9.091016103523366e-06, "loss": 0.7807, "step": 4264 }, { "epoch": 1.176551724137931, "grad_norm": 3.6636359691619873, "learning_rate": 9.09049152820006e-06, "loss": 0.7593, "step": 4265 }, { "epoch": 1.1768275862068966, "grad_norm": 4.0987982749938965, "learning_rate": 9.089966816697851e-06, "loss": 0.7855, "step": 4266 }, { "epoch": 1.177103448275862, "grad_norm": 3.927222967147827, "learning_rate": 9.089441969034205e-06, "loss": 0.8276, "step": 4267 }, { "epoch": 1.1773793103448276, "grad_norm": 4.112236022949219, "learning_rate": 9.088916985226599e-06, "loss": 0.8319, "step": 4268 }, { "epoch": 1.1776551724137931, "grad_norm": 3.9969780445098877, "learning_rate": 9.088391865292506e-06, "loss": 0.7725, "step": 4269 }, { "epoch": 1.1779310344827587, "grad_norm": 3.919208526611328, "learning_rate": 9.08786660924941e-06, "loss": 0.6603, "step": 4270 }, { "epoch": 1.178206896551724, "grad_norm": 3.727339267730713, "learning_rate": 9.087341217114798e-06, "loss": 0.8162, "step": 4271 }, { "epoch": 1.1784827586206896, "grad_norm": 4.15130090713501, "learning_rate": 9.08681568890616e-06, "loss": 0.8296, "step": 4272 }, { "epoch": 1.1787586206896552, "grad_norm": 3.757544994354248, "learning_rate": 9.086290024640994e-06, "loss": 0.7837, "step": 4273 }, { "epoch": 1.1790344827586208, "grad_norm": 4.122561931610107, "learning_rate": 9.085764224336797e-06, "loss": 0.8582, "step": 4274 }, { "epoch": 1.1793103448275861, "grad_norm": 4.392375946044922, "learning_rate": 9.085238288011076e-06, "loss": 0.845, "step": 4275 }, { "epoch": 1.1795862068965517, "grad_norm": 3.9503486156463623, "learning_rate": 9.08471221568134e-06, "loss": 0.7619, "step": 4276 }, { "epoch": 1.1798620689655173, "grad_norm": 3.6698267459869385, "learning_rate": 9.0841860073651e-06, "loss": 0.8117, "step": 4277 }, { "epoch": 1.1801379310344828, "grad_norm": 4.202841281890869, "learning_rate": 9.08365966307988e-06, "loss": 0.8422, "step": 4278 }, { "epoch": 1.1804137931034482, "grad_norm": 3.9556310176849365, "learning_rate": 9.083133182843198e-06, "loss": 0.8746, "step": 4279 }, { "epoch": 1.1806896551724138, "grad_norm": 3.803548812866211, "learning_rate": 9.082606566672584e-06, "loss": 0.8593, "step": 4280 }, { "epoch": 1.1809655172413793, "grad_norm": 3.8975043296813965, "learning_rate": 9.082079814585566e-06, "loss": 0.7593, "step": 4281 }, { "epoch": 1.181241379310345, "grad_norm": 3.7027575969696045, "learning_rate": 9.081552926599687e-06, "loss": 0.9463, "step": 4282 }, { "epoch": 1.1815172413793102, "grad_norm": 4.073347568511963, "learning_rate": 9.08102590273248e-06, "loss": 0.7719, "step": 4283 }, { "epoch": 1.1817931034482758, "grad_norm": 3.587653160095215, "learning_rate": 9.080498743001497e-06, "loss": 0.7582, "step": 4284 }, { "epoch": 1.1820689655172414, "grad_norm": 4.0825276374816895, "learning_rate": 9.079971447424285e-06, "loss": 0.8602, "step": 4285 }, { "epoch": 1.182344827586207, "grad_norm": 4.275148391723633, "learning_rate": 9.0794440160184e-06, "loss": 0.8656, "step": 4286 }, { "epoch": 1.1826206896551725, "grad_norm": 3.9292118549346924, "learning_rate": 9.078916448801397e-06, "loss": 0.9052, "step": 4287 }, { "epoch": 1.1828965517241379, "grad_norm": 3.773826837539673, "learning_rate": 9.078388745790846e-06, "loss": 0.8216, "step": 4288 }, { "epoch": 1.1831724137931035, "grad_norm": 3.7576916217803955, "learning_rate": 9.07786090700431e-06, "loss": 0.7569, "step": 4289 }, { "epoch": 1.183448275862069, "grad_norm": 3.831127405166626, "learning_rate": 9.077332932459364e-06, "loss": 0.865, "step": 4290 }, { "epoch": 1.1837241379310344, "grad_norm": 3.8848793506622314, "learning_rate": 9.076804822173582e-06, "loss": 0.8299, "step": 4291 }, { "epoch": 1.184, "grad_norm": 3.907679319381714, "learning_rate": 9.07627657616455e-06, "loss": 0.8797, "step": 4292 }, { "epoch": 1.1842758620689655, "grad_norm": 4.1982340812683105, "learning_rate": 9.075748194449852e-06, "loss": 0.9842, "step": 4293 }, { "epoch": 1.184551724137931, "grad_norm": 3.8279242515563965, "learning_rate": 9.075219677047078e-06, "loss": 0.8114, "step": 4294 }, { "epoch": 1.1848275862068967, "grad_norm": 4.068211555480957, "learning_rate": 9.074691023973824e-06, "loss": 0.9003, "step": 4295 }, { "epoch": 1.185103448275862, "grad_norm": 3.951972484588623, "learning_rate": 9.07416223524769e-06, "loss": 0.7749, "step": 4296 }, { "epoch": 1.1853793103448276, "grad_norm": 4.076466083526611, "learning_rate": 9.07363331088628e-06, "loss": 0.82, "step": 4297 }, { "epoch": 1.1856551724137931, "grad_norm": 4.123375415802002, "learning_rate": 9.073104250907202e-06, "loss": 0.6643, "step": 4298 }, { "epoch": 1.1859310344827587, "grad_norm": 3.86586594581604, "learning_rate": 9.07257505532807e-06, "loss": 0.8719, "step": 4299 }, { "epoch": 1.186206896551724, "grad_norm": 4.24118709564209, "learning_rate": 9.072045724166502e-06, "loss": 0.8358, "step": 4300 }, { "epoch": 1.1864827586206896, "grad_norm": 4.011579990386963, "learning_rate": 9.071516257440118e-06, "loss": 0.8536, "step": 4301 }, { "epoch": 1.1867586206896552, "grad_norm": 4.302643775939941, "learning_rate": 9.070986655166547e-06, "loss": 0.947, "step": 4302 }, { "epoch": 1.1870344827586208, "grad_norm": 3.5314018726348877, "learning_rate": 9.070456917363421e-06, "loss": 0.7711, "step": 4303 }, { "epoch": 1.1873103448275861, "grad_norm": 3.8377292156219482, "learning_rate": 9.069927044048376e-06, "loss": 0.8595, "step": 4304 }, { "epoch": 1.1875862068965517, "grad_norm": 3.7030742168426514, "learning_rate": 9.069397035239049e-06, "loss": 0.6885, "step": 4305 }, { "epoch": 1.1878620689655173, "grad_norm": 3.8591411113739014, "learning_rate": 9.068866890953087e-06, "loss": 0.6955, "step": 4306 }, { "epoch": 1.1881379310344828, "grad_norm": 4.138055324554443, "learning_rate": 9.068336611208139e-06, "loss": 0.8333, "step": 4307 }, { "epoch": 1.1884137931034482, "grad_norm": 3.8888633251190186, "learning_rate": 9.06780619602186e-06, "loss": 0.7387, "step": 4308 }, { "epoch": 1.1886896551724138, "grad_norm": 4.253654956817627, "learning_rate": 9.067275645411906e-06, "loss": 0.854, "step": 4309 }, { "epoch": 1.1889655172413793, "grad_norm": 4.249771595001221, "learning_rate": 9.066744959395943e-06, "loss": 0.7125, "step": 4310 }, { "epoch": 1.189241379310345, "grad_norm": 4.213516712188721, "learning_rate": 9.066214137991636e-06, "loss": 0.7487, "step": 4311 }, { "epoch": 1.1895172413793103, "grad_norm": 4.649819850921631, "learning_rate": 9.065683181216657e-06, "loss": 0.8671, "step": 4312 }, { "epoch": 1.1897931034482758, "grad_norm": 3.955559015274048, "learning_rate": 9.065152089088682e-06, "loss": 0.7812, "step": 4313 }, { "epoch": 1.1900689655172414, "grad_norm": 4.171421527862549, "learning_rate": 9.064620861625395e-06, "loss": 0.9479, "step": 4314 }, { "epoch": 1.190344827586207, "grad_norm": 4.100063323974609, "learning_rate": 9.064089498844477e-06, "loss": 0.8629, "step": 4315 }, { "epoch": 1.1906206896551725, "grad_norm": 3.9840915203094482, "learning_rate": 9.063558000763621e-06, "loss": 0.7937, "step": 4316 }, { "epoch": 1.1908965517241379, "grad_norm": 4.241152763366699, "learning_rate": 9.063026367400521e-06, "loss": 0.8928, "step": 4317 }, { "epoch": 1.1911724137931035, "grad_norm": 3.7266857624053955, "learning_rate": 9.062494598772875e-06, "loss": 0.664, "step": 4318 }, { "epoch": 1.191448275862069, "grad_norm": 3.902013063430786, "learning_rate": 9.061962694898386e-06, "loss": 0.768, "step": 4319 }, { "epoch": 1.1917241379310344, "grad_norm": 3.958580732345581, "learning_rate": 9.061430655794763e-06, "loss": 0.7456, "step": 4320 }, { "epoch": 1.192, "grad_norm": 4.117892742156982, "learning_rate": 9.060898481479719e-06, "loss": 0.7423, "step": 4321 }, { "epoch": 1.1922758620689655, "grad_norm": 3.7485692501068115, "learning_rate": 9.06036617197097e-06, "loss": 0.7555, "step": 4322 }, { "epoch": 1.192551724137931, "grad_norm": 3.85288143157959, "learning_rate": 9.059833727286237e-06, "loss": 0.825, "step": 4323 }, { "epoch": 1.1928275862068967, "grad_norm": 3.8228020668029785, "learning_rate": 9.059301147443246e-06, "loss": 0.8361, "step": 4324 }, { "epoch": 1.193103448275862, "grad_norm": 4.235054969787598, "learning_rate": 9.058768432459728e-06, "loss": 0.7737, "step": 4325 }, { "epoch": 1.1933793103448276, "grad_norm": 3.5203700065612793, "learning_rate": 9.058235582353418e-06, "loss": 0.7397, "step": 4326 }, { "epoch": 1.1936551724137932, "grad_norm": 4.24254846572876, "learning_rate": 9.057702597142055e-06, "loss": 0.8525, "step": 4327 }, { "epoch": 1.1939310344827585, "grad_norm": 3.320669651031494, "learning_rate": 9.057169476843383e-06, "loss": 0.7038, "step": 4328 }, { "epoch": 1.194206896551724, "grad_norm": 4.062090873718262, "learning_rate": 9.05663622147515e-06, "loss": 0.8307, "step": 4329 }, { "epoch": 1.1944827586206896, "grad_norm": 3.6502888202667236, "learning_rate": 9.05610283105511e-06, "loss": 0.7169, "step": 4330 }, { "epoch": 1.1947586206896552, "grad_norm": 4.370120525360107, "learning_rate": 9.055569305601022e-06, "loss": 0.8056, "step": 4331 }, { "epoch": 1.1950344827586208, "grad_norm": 4.395997524261475, "learning_rate": 9.055035645130643e-06, "loss": 0.8328, "step": 4332 }, { "epoch": 1.1953103448275861, "grad_norm": 3.9445040225982666, "learning_rate": 9.054501849661743e-06, "loss": 0.7125, "step": 4333 }, { "epoch": 1.1955862068965517, "grad_norm": 4.458618640899658, "learning_rate": 9.053967919212092e-06, "loss": 0.7997, "step": 4334 }, { "epoch": 1.1958620689655173, "grad_norm": 4.0544538497924805, "learning_rate": 9.053433853799466e-06, "loss": 0.8597, "step": 4335 }, { "epoch": 1.1961379310344828, "grad_norm": 4.103519439697266, "learning_rate": 9.052899653441643e-06, "loss": 0.7744, "step": 4336 }, { "epoch": 1.1964137931034482, "grad_norm": 4.3286824226379395, "learning_rate": 9.05236531815641e-06, "loss": 0.9138, "step": 4337 }, { "epoch": 1.1966896551724138, "grad_norm": 4.144522190093994, "learning_rate": 9.051830847961552e-06, "loss": 0.8421, "step": 4338 }, { "epoch": 1.1969655172413793, "grad_norm": 4.203409671783447, "learning_rate": 9.051296242874868e-06, "loss": 0.8435, "step": 4339 }, { "epoch": 1.197241379310345, "grad_norm": 4.181499481201172, "learning_rate": 9.05076150291415e-06, "loss": 0.8667, "step": 4340 }, { "epoch": 1.1975172413793103, "grad_norm": 3.907404899597168, "learning_rate": 9.050226628097205e-06, "loss": 0.747, "step": 4341 }, { "epoch": 1.1977931034482758, "grad_norm": 4.029376983642578, "learning_rate": 9.049691618441836e-06, "loss": 0.8649, "step": 4342 }, { "epoch": 1.1980689655172414, "grad_norm": 4.0539870262146, "learning_rate": 9.049156473965859e-06, "loss": 0.7387, "step": 4343 }, { "epoch": 1.198344827586207, "grad_norm": 3.907081127166748, "learning_rate": 9.048621194687083e-06, "loss": 0.8505, "step": 4344 }, { "epoch": 1.1986206896551723, "grad_norm": 4.227381229400635, "learning_rate": 9.048085780623336e-06, "loss": 0.8871, "step": 4345 }, { "epoch": 1.198896551724138, "grad_norm": 3.9908413887023926, "learning_rate": 9.047550231792437e-06, "loss": 0.7956, "step": 4346 }, { "epoch": 1.1991724137931035, "grad_norm": 3.7502286434173584, "learning_rate": 9.047014548212219e-06, "loss": 0.8177, "step": 4347 }, { "epoch": 1.199448275862069, "grad_norm": 3.7174811363220215, "learning_rate": 9.046478729900512e-06, "loss": 0.8155, "step": 4348 }, { "epoch": 1.1997241379310344, "grad_norm": 4.062794208526611, "learning_rate": 9.045942776875159e-06, "loss": 0.8836, "step": 4349 }, { "epoch": 1.2, "grad_norm": 3.834033489227295, "learning_rate": 9.045406689153996e-06, "loss": 0.7447, "step": 4350 }, { "epoch": 1.2002758620689655, "grad_norm": 3.8064303398132324, "learning_rate": 9.044870466754878e-06, "loss": 0.7622, "step": 4351 }, { "epoch": 1.200551724137931, "grad_norm": 3.8532707691192627, "learning_rate": 9.044334109695651e-06, "loss": 0.8149, "step": 4352 }, { "epoch": 1.2008275862068967, "grad_norm": 3.7969746589660645, "learning_rate": 9.043797617994173e-06, "loss": 0.7089, "step": 4353 }, { "epoch": 1.201103448275862, "grad_norm": 3.8946645259857178, "learning_rate": 9.043260991668306e-06, "loss": 0.7191, "step": 4354 }, { "epoch": 1.2013793103448276, "grad_norm": 3.6556053161621094, "learning_rate": 9.042724230735913e-06, "loss": 0.7664, "step": 4355 }, { "epoch": 1.2016551724137932, "grad_norm": 3.5350990295410156, "learning_rate": 9.042187335214865e-06, "loss": 0.7216, "step": 4356 }, { "epoch": 1.2019310344827585, "grad_norm": 4.051614284515381, "learning_rate": 9.041650305123035e-06, "loss": 0.9153, "step": 4357 }, { "epoch": 1.202206896551724, "grad_norm": 4.236313819885254, "learning_rate": 9.041113140478301e-06, "loss": 0.8248, "step": 4358 }, { "epoch": 1.2024827586206897, "grad_norm": 3.960017681121826, "learning_rate": 9.040575841298549e-06, "loss": 0.7681, "step": 4359 }, { "epoch": 1.2027586206896552, "grad_norm": 3.725022792816162, "learning_rate": 9.040038407601665e-06, "loss": 0.7804, "step": 4360 }, { "epoch": 1.2030344827586208, "grad_norm": 4.059642791748047, "learning_rate": 9.03950083940554e-06, "loss": 0.8832, "step": 4361 }, { "epoch": 1.2033103448275861, "grad_norm": 3.874380111694336, "learning_rate": 9.038963136728072e-06, "loss": 0.6583, "step": 4362 }, { "epoch": 1.2035862068965517, "grad_norm": 4.20599889755249, "learning_rate": 9.038425299587159e-06, "loss": 0.843, "step": 4363 }, { "epoch": 1.2038620689655173, "grad_norm": 4.177585124969482, "learning_rate": 9.03788732800071e-06, "loss": 0.9157, "step": 4364 }, { "epoch": 1.2041379310344829, "grad_norm": 4.158649444580078, "learning_rate": 9.037349221986635e-06, "loss": 0.8448, "step": 4365 }, { "epoch": 1.2044137931034482, "grad_norm": 4.448556423187256, "learning_rate": 9.036810981562845e-06, "loss": 0.9087, "step": 4366 }, { "epoch": 1.2046896551724138, "grad_norm": 4.128057479858398, "learning_rate": 9.036272606747263e-06, "loss": 0.8115, "step": 4367 }, { "epoch": 1.2049655172413793, "grad_norm": 3.742710590362549, "learning_rate": 9.035734097557808e-06, "loss": 0.7476, "step": 4368 }, { "epoch": 1.205241379310345, "grad_norm": 4.170004844665527, "learning_rate": 9.035195454012412e-06, "loss": 0.8385, "step": 4369 }, { "epoch": 1.2055172413793103, "grad_norm": 4.108280658721924, "learning_rate": 9.034656676129003e-06, "loss": 0.8751, "step": 4370 }, { "epoch": 1.2057931034482758, "grad_norm": 4.354494094848633, "learning_rate": 9.03411776392552e-06, "loss": 0.8877, "step": 4371 }, { "epoch": 1.2060689655172414, "grad_norm": 3.7797868251800537, "learning_rate": 9.033578717419906e-06, "loss": 0.7874, "step": 4372 }, { "epoch": 1.206344827586207, "grad_norm": 3.997070789337158, "learning_rate": 9.033039536630105e-06, "loss": 0.7403, "step": 4373 }, { "epoch": 1.2066206896551723, "grad_norm": 3.985250949859619, "learning_rate": 9.032500221574068e-06, "loss": 0.8263, "step": 4374 }, { "epoch": 1.206896551724138, "grad_norm": 3.867225170135498, "learning_rate": 9.031960772269748e-06, "loss": 0.7716, "step": 4375 }, { "epoch": 1.2071724137931035, "grad_norm": 3.7809739112854004, "learning_rate": 9.031421188735104e-06, "loss": 0.6958, "step": 4376 }, { "epoch": 1.207448275862069, "grad_norm": 4.178616046905518, "learning_rate": 9.030881470988101e-06, "loss": 0.9476, "step": 4377 }, { "epoch": 1.2077241379310344, "grad_norm": 3.914594888687134, "learning_rate": 9.030341619046707e-06, "loss": 0.8055, "step": 4378 }, { "epoch": 1.208, "grad_norm": 3.82774019241333, "learning_rate": 9.029801632928894e-06, "loss": 0.6736, "step": 4379 }, { "epoch": 1.2082758620689655, "grad_norm": 3.592251777648926, "learning_rate": 9.029261512652639e-06, "loss": 0.7944, "step": 4380 }, { "epoch": 1.208551724137931, "grad_norm": 3.830724000930786, "learning_rate": 9.028721258235924e-06, "loss": 0.7407, "step": 4381 }, { "epoch": 1.2088275862068967, "grad_norm": 3.952695369720459, "learning_rate": 9.028180869696735e-06, "loss": 0.791, "step": 4382 }, { "epoch": 1.209103448275862, "grad_norm": 3.9787352085113525, "learning_rate": 9.027640347053059e-06, "loss": 0.967, "step": 4383 }, { "epoch": 1.2093793103448276, "grad_norm": 3.8230371475219727, "learning_rate": 9.027099690322896e-06, "loss": 0.8164, "step": 4384 }, { "epoch": 1.2096551724137932, "grad_norm": 3.6714298725128174, "learning_rate": 9.026558899524242e-06, "loss": 0.7954, "step": 4385 }, { "epoch": 1.2099310344827585, "grad_norm": 3.855890989303589, "learning_rate": 9.026017974675105e-06, "loss": 0.8349, "step": 4386 }, { "epoch": 1.210206896551724, "grad_norm": 3.76678204536438, "learning_rate": 9.025476915793487e-06, "loss": 0.7501, "step": 4387 }, { "epoch": 1.2104827586206897, "grad_norm": 3.4807116985321045, "learning_rate": 9.024935722897404e-06, "loss": 0.8044, "step": 4388 }, { "epoch": 1.2107586206896552, "grad_norm": 3.815399169921875, "learning_rate": 9.024394396004874e-06, "loss": 0.8074, "step": 4389 }, { "epoch": 1.2110344827586208, "grad_norm": 4.043045997619629, "learning_rate": 9.023852935133917e-06, "loss": 0.8381, "step": 4390 }, { "epoch": 1.2113103448275861, "grad_norm": 3.872682809829712, "learning_rate": 9.023311340302558e-06, "loss": 0.8854, "step": 4391 }, { "epoch": 1.2115862068965517, "grad_norm": 4.349404811859131, "learning_rate": 9.022769611528833e-06, "loss": 0.9288, "step": 4392 }, { "epoch": 1.2118620689655173, "grad_norm": 3.657633066177368, "learning_rate": 9.02222774883077e-06, "loss": 0.7585, "step": 4393 }, { "epoch": 1.2121379310344826, "grad_norm": 3.3236348628997803, "learning_rate": 9.021685752226412e-06, "loss": 0.8072, "step": 4394 }, { "epoch": 1.2124137931034482, "grad_norm": 3.7509655952453613, "learning_rate": 9.021143621733803e-06, "loss": 0.7721, "step": 4395 }, { "epoch": 1.2126896551724138, "grad_norm": 3.8978431224823, "learning_rate": 9.02060135737099e-06, "loss": 0.8294, "step": 4396 }, { "epoch": 1.2129655172413794, "grad_norm": 3.748748302459717, "learning_rate": 9.020058959156029e-06, "loss": 0.7619, "step": 4397 }, { "epoch": 1.213241379310345, "grad_norm": 4.016090393066406, "learning_rate": 9.019516427106973e-06, "loss": 0.8806, "step": 4398 }, { "epoch": 1.2135172413793103, "grad_norm": 4.465331554412842, "learning_rate": 9.018973761241887e-06, "loss": 0.8137, "step": 4399 }, { "epoch": 1.2137931034482758, "grad_norm": 4.039361000061035, "learning_rate": 9.018430961578838e-06, "loss": 0.7982, "step": 4400 }, { "epoch": 1.2140689655172414, "grad_norm": 3.954237222671509, "learning_rate": 9.017888028135891e-06, "loss": 0.8252, "step": 4401 }, { "epoch": 1.214344827586207, "grad_norm": 3.8153586387634277, "learning_rate": 9.017344960931126e-06, "loss": 0.7269, "step": 4402 }, { "epoch": 1.2146206896551723, "grad_norm": 4.136900901794434, "learning_rate": 9.01680175998262e-06, "loss": 0.8419, "step": 4403 }, { "epoch": 1.214896551724138, "grad_norm": 4.10459566116333, "learning_rate": 9.016258425308462e-06, "loss": 0.7862, "step": 4404 }, { "epoch": 1.2151724137931035, "grad_norm": 3.9049577713012695, "learning_rate": 9.015714956926733e-06, "loss": 0.9078, "step": 4405 }, { "epoch": 1.215448275862069, "grad_norm": 4.333651542663574, "learning_rate": 9.015171354855533e-06, "loss": 0.8306, "step": 4406 }, { "epoch": 1.2157241379310344, "grad_norm": 3.790846347808838, "learning_rate": 9.014627619112953e-06, "loss": 0.7277, "step": 4407 }, { "epoch": 1.216, "grad_norm": 3.6978609561920166, "learning_rate": 9.014083749717099e-06, "loss": 0.7146, "step": 4408 }, { "epoch": 1.2162758620689655, "grad_norm": 3.9210145473480225, "learning_rate": 9.013539746686078e-06, "loss": 0.9058, "step": 4409 }, { "epoch": 1.2165517241379311, "grad_norm": 3.7286882400512695, "learning_rate": 9.012995610037999e-06, "loss": 0.7471, "step": 4410 }, { "epoch": 1.2168275862068965, "grad_norm": 3.9336233139038086, "learning_rate": 9.012451339790975e-06, "loss": 0.8159, "step": 4411 }, { "epoch": 1.217103448275862, "grad_norm": 3.498065710067749, "learning_rate": 9.011906935963127e-06, "loss": 0.6909, "step": 4412 }, { "epoch": 1.2173793103448276, "grad_norm": 4.4491167068481445, "learning_rate": 9.01136239857258e-06, "loss": 0.8586, "step": 4413 }, { "epoch": 1.2176551724137932, "grad_norm": 3.9711475372314453, "learning_rate": 9.010817727637465e-06, "loss": 0.7749, "step": 4414 }, { "epoch": 1.2179310344827585, "grad_norm": 4.01042366027832, "learning_rate": 9.01027292317591e-06, "loss": 0.8074, "step": 4415 }, { "epoch": 1.218206896551724, "grad_norm": 3.946235179901123, "learning_rate": 9.009727985206056e-06, "loss": 0.7693, "step": 4416 }, { "epoch": 1.2184827586206897, "grad_norm": 4.027617454528809, "learning_rate": 9.009182913746042e-06, "loss": 0.7115, "step": 4417 }, { "epoch": 1.2187586206896552, "grad_norm": 3.860868453979492, "learning_rate": 9.008637708814016e-06, "loss": 0.7917, "step": 4418 }, { "epoch": 1.2190344827586208, "grad_norm": 4.074306488037109, "learning_rate": 9.00809237042813e-06, "loss": 0.8808, "step": 4419 }, { "epoch": 1.2193103448275862, "grad_norm": 3.884547710418701, "learning_rate": 9.007546898606536e-06, "loss": 0.7963, "step": 4420 }, { "epoch": 1.2195862068965517, "grad_norm": 4.167460918426514, "learning_rate": 9.007001293367396e-06, "loss": 0.8824, "step": 4421 }, { "epoch": 1.2198620689655173, "grad_norm": 3.9919419288635254, "learning_rate": 9.006455554728873e-06, "loss": 0.7381, "step": 4422 }, { "epoch": 1.2201379310344826, "grad_norm": 3.8948702812194824, "learning_rate": 9.005909682709136e-06, "loss": 0.8977, "step": 4423 }, { "epoch": 1.2204137931034482, "grad_norm": 3.9914543628692627, "learning_rate": 9.005363677326358e-06, "loss": 0.8918, "step": 4424 }, { "epoch": 1.2206896551724138, "grad_norm": 3.824204444885254, "learning_rate": 9.004817538598716e-06, "loss": 0.8526, "step": 4425 }, { "epoch": 1.2209655172413794, "grad_norm": 4.089150905609131, "learning_rate": 9.00427126654439e-06, "loss": 0.9142, "step": 4426 }, { "epoch": 1.221241379310345, "grad_norm": 4.459463596343994, "learning_rate": 9.003724861181571e-06, "loss": 0.8176, "step": 4427 }, { "epoch": 1.2215172413793103, "grad_norm": 4.578126430511475, "learning_rate": 9.003178322528446e-06, "loss": 0.919, "step": 4428 }, { "epoch": 1.2217931034482759, "grad_norm": 3.8094141483306885, "learning_rate": 9.00263165060321e-06, "loss": 0.8313, "step": 4429 }, { "epoch": 1.2220689655172414, "grad_norm": 3.95444393157959, "learning_rate": 9.002084845424067e-06, "loss": 0.7602, "step": 4430 }, { "epoch": 1.2223448275862068, "grad_norm": 3.4976117610931396, "learning_rate": 9.001537907009214e-06, "loss": 0.7035, "step": 4431 }, { "epoch": 1.2226206896551723, "grad_norm": 3.6361520290374756, "learning_rate": 9.000990835376864e-06, "loss": 0.8159, "step": 4432 }, { "epoch": 1.222896551724138, "grad_norm": 3.883434772491455, "learning_rate": 9.00044363054523e-06, "loss": 0.813, "step": 4433 }, { "epoch": 1.2231724137931035, "grad_norm": 4.091454982757568, "learning_rate": 8.999896292532526e-06, "loss": 0.8118, "step": 4434 }, { "epoch": 1.223448275862069, "grad_norm": 3.868630886077881, "learning_rate": 8.999348821356978e-06, "loss": 0.8495, "step": 4435 }, { "epoch": 1.2237241379310344, "grad_norm": 3.9202802181243896, "learning_rate": 8.99880121703681e-06, "loss": 0.8497, "step": 4436 }, { "epoch": 1.224, "grad_norm": 4.0152201652526855, "learning_rate": 8.998253479590251e-06, "loss": 0.9472, "step": 4437 }, { "epoch": 1.2242758620689655, "grad_norm": 4.203105926513672, "learning_rate": 8.99770560903554e-06, "loss": 0.8298, "step": 4438 }, { "epoch": 1.2245517241379311, "grad_norm": 3.783916711807251, "learning_rate": 8.997157605390914e-06, "loss": 0.7037, "step": 4439 }, { "epoch": 1.2248275862068965, "grad_norm": 4.04276180267334, "learning_rate": 8.996609468674616e-06, "loss": 0.8051, "step": 4440 }, { "epoch": 1.225103448275862, "grad_norm": 4.162311553955078, "learning_rate": 8.996061198904897e-06, "loss": 0.8565, "step": 4441 }, { "epoch": 1.2253793103448276, "grad_norm": 4.152978420257568, "learning_rate": 8.995512796100007e-06, "loss": 0.8058, "step": 4442 }, { "epoch": 1.2256551724137932, "grad_norm": 4.042601108551025, "learning_rate": 8.994964260278204e-06, "loss": 0.8394, "step": 4443 }, { "epoch": 1.2259310344827585, "grad_norm": 4.78580379486084, "learning_rate": 8.994415591457753e-06, "loss": 0.8068, "step": 4444 }, { "epoch": 1.226206896551724, "grad_norm": 3.5093023777008057, "learning_rate": 8.993866789656913e-06, "loss": 0.7358, "step": 4445 }, { "epoch": 1.2264827586206897, "grad_norm": 4.209536075592041, "learning_rate": 8.99331785489396e-06, "loss": 0.9555, "step": 4446 }, { "epoch": 1.2267586206896552, "grad_norm": 4.443370819091797, "learning_rate": 8.99276878718717e-06, "loss": 0.907, "step": 4447 }, { "epoch": 1.2270344827586206, "grad_norm": 3.970893383026123, "learning_rate": 8.992219586554817e-06, "loss": 0.7857, "step": 4448 }, { "epoch": 1.2273103448275862, "grad_norm": 3.4672958850860596, "learning_rate": 8.99167025301519e-06, "loss": 0.7767, "step": 4449 }, { "epoch": 1.2275862068965517, "grad_norm": 4.284389495849609, "learning_rate": 8.991120786586573e-06, "loss": 0.816, "step": 4450 }, { "epoch": 1.2278620689655173, "grad_norm": 4.0126166343688965, "learning_rate": 8.990571187287262e-06, "loss": 0.9002, "step": 4451 }, { "epoch": 1.2281379310344827, "grad_norm": 3.8707048892974854, "learning_rate": 8.99002145513555e-06, "loss": 0.9386, "step": 4452 }, { "epoch": 1.2284137931034482, "grad_norm": 3.7948360443115234, "learning_rate": 8.989471590149745e-06, "loss": 0.8197, "step": 4453 }, { "epoch": 1.2286896551724138, "grad_norm": 4.304454803466797, "learning_rate": 8.988921592348145e-06, "loss": 0.8708, "step": 4454 }, { "epoch": 1.2289655172413794, "grad_norm": 4.087325572967529, "learning_rate": 8.988371461749065e-06, "loss": 0.7689, "step": 4455 }, { "epoch": 1.229241379310345, "grad_norm": 4.362293243408203, "learning_rate": 8.987821198370818e-06, "loss": 0.8641, "step": 4456 }, { "epoch": 1.2295172413793103, "grad_norm": 3.693113088607788, "learning_rate": 8.987270802231727e-06, "loss": 0.6824, "step": 4457 }, { "epoch": 1.2297931034482759, "grad_norm": 4.011290550231934, "learning_rate": 8.986720273350108e-06, "loss": 0.954, "step": 4458 }, { "epoch": 1.2300689655172414, "grad_norm": 4.016030788421631, "learning_rate": 8.986169611744297e-06, "loss": 0.8998, "step": 4459 }, { "epoch": 1.2303448275862068, "grad_norm": 4.295276165008545, "learning_rate": 8.985618817432622e-06, "loss": 0.9239, "step": 4460 }, { "epoch": 1.2306206896551724, "grad_norm": 3.8384177684783936, "learning_rate": 8.985067890433419e-06, "loss": 0.7861, "step": 4461 }, { "epoch": 1.230896551724138, "grad_norm": 4.162559509277344, "learning_rate": 8.984516830765033e-06, "loss": 1.0168, "step": 4462 }, { "epoch": 1.2311724137931035, "grad_norm": 3.6268250942230225, "learning_rate": 8.983965638445807e-06, "loss": 0.8736, "step": 4463 }, { "epoch": 1.231448275862069, "grad_norm": 3.96224045753479, "learning_rate": 8.98341431349409e-06, "loss": 0.7671, "step": 4464 }, { "epoch": 1.2317241379310344, "grad_norm": 3.571387767791748, "learning_rate": 8.982862855928238e-06, "loss": 0.7976, "step": 4465 }, { "epoch": 1.232, "grad_norm": 3.9569716453552246, "learning_rate": 8.982311265766611e-06, "loss": 0.8123, "step": 4466 }, { "epoch": 1.2322758620689656, "grad_norm": 3.5959906578063965, "learning_rate": 8.981759543027571e-06, "loss": 0.7463, "step": 4467 }, { "epoch": 1.2325517241379311, "grad_norm": 4.313858509063721, "learning_rate": 8.981207687729488e-06, "loss": 0.8233, "step": 4468 }, { "epoch": 1.2328275862068965, "grad_norm": 4.026139736175537, "learning_rate": 8.980655699890728e-06, "loss": 0.9182, "step": 4469 }, { "epoch": 1.233103448275862, "grad_norm": 4.034270286560059, "learning_rate": 8.980103579529674e-06, "loss": 0.851, "step": 4470 }, { "epoch": 1.2333793103448276, "grad_norm": 4.0081377029418945, "learning_rate": 8.979551326664704e-06, "loss": 0.7892, "step": 4471 }, { "epoch": 1.2336551724137932, "grad_norm": 3.5340404510498047, "learning_rate": 8.978998941314205e-06, "loss": 0.8633, "step": 4472 }, { "epoch": 1.2339310344827585, "grad_norm": 4.209412097930908, "learning_rate": 8.978446423496564e-06, "loss": 0.8731, "step": 4473 }, { "epoch": 1.234206896551724, "grad_norm": 3.74753737449646, "learning_rate": 8.977893773230179e-06, "loss": 0.8003, "step": 4474 }, { "epoch": 1.2344827586206897, "grad_norm": 3.631526231765747, "learning_rate": 8.977340990533445e-06, "loss": 0.784, "step": 4475 }, { "epoch": 1.2347586206896553, "grad_norm": 4.203005313873291, "learning_rate": 8.976788075424768e-06, "loss": 0.8152, "step": 4476 }, { "epoch": 1.2350344827586206, "grad_norm": 4.215470314025879, "learning_rate": 8.976235027922553e-06, "loss": 0.8176, "step": 4477 }, { "epoch": 1.2353103448275862, "grad_norm": 3.988636016845703, "learning_rate": 8.975681848045212e-06, "loss": 0.7071, "step": 4478 }, { "epoch": 1.2355862068965517, "grad_norm": 3.824709892272949, "learning_rate": 8.975128535811162e-06, "loss": 0.7935, "step": 4479 }, { "epoch": 1.2358620689655173, "grad_norm": 3.7739078998565674, "learning_rate": 8.974575091238823e-06, "loss": 0.8671, "step": 4480 }, { "epoch": 1.2361379310344827, "grad_norm": 3.687368154525757, "learning_rate": 8.974021514346622e-06, "loss": 0.7795, "step": 4481 }, { "epoch": 1.2364137931034482, "grad_norm": 3.87721848487854, "learning_rate": 8.973467805152988e-06, "loss": 0.7864, "step": 4482 }, { "epoch": 1.2366896551724138, "grad_norm": 4.413524150848389, "learning_rate": 8.972913963676352e-06, "loss": 1.0043, "step": 4483 }, { "epoch": 1.2369655172413794, "grad_norm": 3.8205347061157227, "learning_rate": 8.972359989935156e-06, "loss": 0.7088, "step": 4484 }, { "epoch": 1.237241379310345, "grad_norm": 4.337759017944336, "learning_rate": 8.971805883947839e-06, "loss": 0.8327, "step": 4485 }, { "epoch": 1.2375172413793103, "grad_norm": 3.860689401626587, "learning_rate": 8.971251645732851e-06, "loss": 0.8149, "step": 4486 }, { "epoch": 1.2377931034482759, "grad_norm": 3.9955952167510986, "learning_rate": 8.970697275308643e-06, "loss": 0.8636, "step": 4487 }, { "epoch": 1.2380689655172414, "grad_norm": 3.799318790435791, "learning_rate": 8.970142772693669e-06, "loss": 0.8337, "step": 4488 }, { "epoch": 1.2383448275862068, "grad_norm": 3.4355034828186035, "learning_rate": 8.969588137906391e-06, "loss": 0.6828, "step": 4489 }, { "epoch": 1.2386206896551724, "grad_norm": 4.037051677703857, "learning_rate": 8.969033370965273e-06, "loss": 0.8604, "step": 4490 }, { "epoch": 1.238896551724138, "grad_norm": 4.140346527099609, "learning_rate": 8.968478471888784e-06, "loss": 0.9349, "step": 4491 }, { "epoch": 1.2391724137931035, "grad_norm": 4.1202616691589355, "learning_rate": 8.967923440695398e-06, "loss": 0.8599, "step": 4492 }, { "epoch": 1.239448275862069, "grad_norm": 4.093826770782471, "learning_rate": 8.967368277403592e-06, "loss": 0.7946, "step": 4493 }, { "epoch": 1.2397241379310344, "grad_norm": 4.360342502593994, "learning_rate": 8.96681298203185e-06, "loss": 0.8821, "step": 4494 }, { "epoch": 1.24, "grad_norm": 3.561156749725342, "learning_rate": 8.966257554598657e-06, "loss": 0.824, "step": 4495 }, { "epoch": 1.2402758620689656, "grad_norm": 4.259415626525879, "learning_rate": 8.965701995122505e-06, "loss": 0.8328, "step": 4496 }, { "epoch": 1.240551724137931, "grad_norm": 3.6998753547668457, "learning_rate": 8.965146303621888e-06, "loss": 0.7644, "step": 4497 }, { "epoch": 1.2408275862068965, "grad_norm": 3.345297336578369, "learning_rate": 8.964590480115306e-06, "loss": 0.7487, "step": 4498 }, { "epoch": 1.241103448275862, "grad_norm": 3.788738489151001, "learning_rate": 8.964034524621268e-06, "loss": 0.7676, "step": 4499 }, { "epoch": 1.2413793103448276, "grad_norm": 3.8235645294189453, "learning_rate": 8.963478437158274e-06, "loss": 0.9106, "step": 4500 }, { "epoch": 1.2413793103448276, "eval_loss": 1.2816946506500244, "eval_runtime": 13.8044, "eval_samples_per_second": 28.976, "eval_steps_per_second": 3.622, "step": 4500 }, { "epoch": 1.2416551724137932, "grad_norm": 4.227447986602783, "learning_rate": 8.962922217744844e-06, "loss": 0.8205, "step": 4501 }, { "epoch": 1.2419310344827585, "grad_norm": 3.676889419555664, "learning_rate": 8.962365866399493e-06, "loss": 0.7713, "step": 4502 }, { "epoch": 1.2422068965517241, "grad_norm": 3.7375550270080566, "learning_rate": 8.961809383140745e-06, "loss": 0.8328, "step": 4503 }, { "epoch": 1.2424827586206897, "grad_norm": 3.8108179569244385, "learning_rate": 8.961252767987122e-06, "loss": 0.7866, "step": 4504 }, { "epoch": 1.2427586206896553, "grad_norm": 4.095095157623291, "learning_rate": 8.960696020957157e-06, "loss": 0.9082, "step": 4505 }, { "epoch": 1.2430344827586206, "grad_norm": 3.8897149562835693, "learning_rate": 8.960139142069385e-06, "loss": 0.8277, "step": 4506 }, { "epoch": 1.2433103448275862, "grad_norm": 3.810572862625122, "learning_rate": 8.959582131342344e-06, "loss": 0.9644, "step": 4507 }, { "epoch": 1.2435862068965517, "grad_norm": 4.4946417808532715, "learning_rate": 8.959024988794581e-06, "loss": 0.8129, "step": 4508 }, { "epoch": 1.2438620689655173, "grad_norm": 3.88026762008667, "learning_rate": 8.958467714444641e-06, "loss": 0.8534, "step": 4509 }, { "epoch": 1.2441379310344827, "grad_norm": 3.8957395553588867, "learning_rate": 8.957910308311077e-06, "loss": 0.7998, "step": 4510 }, { "epoch": 1.2444137931034482, "grad_norm": 5.014124393463135, "learning_rate": 8.957352770412446e-06, "loss": 0.8925, "step": 4511 }, { "epoch": 1.2446896551724138, "grad_norm": 4.016829013824463, "learning_rate": 8.95679510076731e-06, "loss": 0.7952, "step": 4512 }, { "epoch": 1.2449655172413794, "grad_norm": 4.060423851013184, "learning_rate": 8.956237299394238e-06, "loss": 0.8679, "step": 4513 }, { "epoch": 1.2452413793103447, "grad_norm": 3.834303140640259, "learning_rate": 8.95567936631179e-06, "loss": 0.7523, "step": 4514 }, { "epoch": 1.2455172413793103, "grad_norm": 3.9956629276275635, "learning_rate": 8.955121301538553e-06, "loss": 0.7478, "step": 4515 }, { "epoch": 1.2457931034482759, "grad_norm": 3.906611680984497, "learning_rate": 8.954563105093098e-06, "loss": 0.632, "step": 4516 }, { "epoch": 1.2460689655172414, "grad_norm": 3.8418335914611816, "learning_rate": 8.954004776994009e-06, "loss": 0.7644, "step": 4517 }, { "epoch": 1.2463448275862068, "grad_norm": 4.58188533782959, "learning_rate": 8.953446317259877e-06, "loss": 0.9175, "step": 4518 }, { "epoch": 1.2466206896551724, "grad_norm": 4.220447540283203, "learning_rate": 8.95288772590929e-06, "loss": 0.9068, "step": 4519 }, { "epoch": 1.246896551724138, "grad_norm": 3.992619037628174, "learning_rate": 8.952329002960846e-06, "loss": 0.7809, "step": 4520 }, { "epoch": 1.2471724137931035, "grad_norm": 4.36971378326416, "learning_rate": 8.951770148433148e-06, "loss": 0.876, "step": 4521 }, { "epoch": 1.247448275862069, "grad_norm": 3.9332313537597656, "learning_rate": 8.951211162344798e-06, "loss": 0.7791, "step": 4522 }, { "epoch": 1.2477241379310344, "grad_norm": 4.239523887634277, "learning_rate": 8.950652044714408e-06, "loss": 0.8295, "step": 4523 }, { "epoch": 1.248, "grad_norm": 3.7605631351470947, "learning_rate": 8.950092795560587e-06, "loss": 0.8364, "step": 4524 }, { "epoch": 1.2482758620689656, "grad_norm": 3.9673802852630615, "learning_rate": 8.94953341490196e-06, "loss": 0.7432, "step": 4525 }, { "epoch": 1.248551724137931, "grad_norm": 4.10199499130249, "learning_rate": 8.948973902757146e-06, "loss": 0.84, "step": 4526 }, { "epoch": 1.2488275862068965, "grad_norm": 4.193236351013184, "learning_rate": 8.94841425914477e-06, "loss": 0.9054, "step": 4527 }, { "epoch": 1.249103448275862, "grad_norm": 3.7848830223083496, "learning_rate": 8.947854484083468e-06, "loss": 0.9043, "step": 4528 }, { "epoch": 1.2493793103448276, "grad_norm": 3.679241418838501, "learning_rate": 8.947294577591874e-06, "loss": 0.7759, "step": 4529 }, { "epoch": 1.2496551724137932, "grad_norm": 3.9213390350341797, "learning_rate": 8.946734539688628e-06, "loss": 0.7374, "step": 4530 }, { "epoch": 1.2499310344827586, "grad_norm": 4.01970911026001, "learning_rate": 8.946174370392376e-06, "loss": 0.845, "step": 4531 }, { "epoch": 1.2502068965517241, "grad_norm": 3.7874550819396973, "learning_rate": 8.945614069721761e-06, "loss": 0.8048, "step": 4532 }, { "epoch": 1.2504827586206897, "grad_norm": 3.9122631549835205, "learning_rate": 8.945053637695444e-06, "loss": 0.7629, "step": 4533 }, { "epoch": 1.250758620689655, "grad_norm": 3.9283909797668457, "learning_rate": 8.944493074332079e-06, "loss": 0.7806, "step": 4534 }, { "epoch": 1.2510344827586206, "grad_norm": 4.0136895179748535, "learning_rate": 8.943932379650328e-06, "loss": 0.7846, "step": 4535 }, { "epoch": 1.2513103448275862, "grad_norm": 4.162336826324463, "learning_rate": 8.943371553668856e-06, "loss": 0.8035, "step": 4536 }, { "epoch": 1.2515862068965518, "grad_norm": 3.4894165992736816, "learning_rate": 8.942810596406338e-06, "loss": 0.7488, "step": 4537 }, { "epoch": 1.2518620689655173, "grad_norm": 4.213534832000732, "learning_rate": 8.942249507881446e-06, "loss": 0.8782, "step": 4538 }, { "epoch": 1.2521379310344827, "grad_norm": 4.282659530639648, "learning_rate": 8.94168828811286e-06, "loss": 0.8296, "step": 4539 }, { "epoch": 1.2524137931034482, "grad_norm": 3.647909641265869, "learning_rate": 8.941126937119264e-06, "loss": 0.795, "step": 4540 }, { "epoch": 1.2526896551724138, "grad_norm": 4.235429286956787, "learning_rate": 8.940565454919346e-06, "loss": 0.8468, "step": 4541 }, { "epoch": 1.2529655172413794, "grad_norm": 3.881979465484619, "learning_rate": 8.9400038415318e-06, "loss": 0.845, "step": 4542 }, { "epoch": 1.253241379310345, "grad_norm": 4.007460117340088, "learning_rate": 8.939442096975321e-06, "loss": 0.8527, "step": 4543 }, { "epoch": 1.2535172413793103, "grad_norm": 3.9860730171203613, "learning_rate": 8.938880221268611e-06, "loss": 0.7057, "step": 4544 }, { "epoch": 1.2537931034482759, "grad_norm": 3.7831904888153076, "learning_rate": 8.938318214430378e-06, "loss": 0.8034, "step": 4545 }, { "epoch": 1.2540689655172415, "grad_norm": 3.7031965255737305, "learning_rate": 8.937756076479329e-06, "loss": 0.8481, "step": 4546 }, { "epoch": 1.2543448275862068, "grad_norm": 3.7704343795776367, "learning_rate": 8.93719380743418e-06, "loss": 0.8336, "step": 4547 }, { "epoch": 1.2546206896551724, "grad_norm": 3.9671883583068848, "learning_rate": 8.936631407313648e-06, "loss": 0.8044, "step": 4548 }, { "epoch": 1.254896551724138, "grad_norm": 3.6600382328033447, "learning_rate": 8.936068876136459e-06, "loss": 0.7893, "step": 4549 }, { "epoch": 1.2551724137931035, "grad_norm": 3.8628416061401367, "learning_rate": 8.93550621392134e-06, "loss": 0.7614, "step": 4550 }, { "epoch": 1.255448275862069, "grad_norm": 3.7260029315948486, "learning_rate": 8.934943420687022e-06, "loss": 0.8427, "step": 4551 }, { "epoch": 1.2557241379310344, "grad_norm": 4.001220226287842, "learning_rate": 8.934380496452238e-06, "loss": 0.7642, "step": 4552 }, { "epoch": 1.256, "grad_norm": 4.374683380126953, "learning_rate": 8.933817441235734e-06, "loss": 0.8277, "step": 4553 }, { "epoch": 1.2562758620689656, "grad_norm": 4.116793155670166, "learning_rate": 8.933254255056254e-06, "loss": 0.9126, "step": 4554 }, { "epoch": 1.256551724137931, "grad_norm": 3.9582812786102295, "learning_rate": 8.932690937932547e-06, "loss": 0.8672, "step": 4555 }, { "epoch": 1.2568275862068965, "grad_norm": 3.9386682510375977, "learning_rate": 8.932127489883364e-06, "loss": 0.716, "step": 4556 }, { "epoch": 1.257103448275862, "grad_norm": 3.8267908096313477, "learning_rate": 8.931563910927466e-06, "loss": 0.8295, "step": 4557 }, { "epoch": 1.2573793103448276, "grad_norm": 3.620615243911743, "learning_rate": 8.931000201083616e-06, "loss": 0.7246, "step": 4558 }, { "epoch": 1.2576551724137932, "grad_norm": 3.9701454639434814, "learning_rate": 8.930436360370578e-06, "loss": 0.7105, "step": 4559 }, { "epoch": 1.2579310344827586, "grad_norm": 3.8777923583984375, "learning_rate": 8.929872388807124e-06, "loss": 0.8904, "step": 4560 }, { "epoch": 1.2582068965517241, "grad_norm": 3.6315712928771973, "learning_rate": 8.929308286412029e-06, "loss": 0.8338, "step": 4561 }, { "epoch": 1.2584827586206897, "grad_norm": 4.275798797607422, "learning_rate": 8.928744053204076e-06, "loss": 0.7945, "step": 4562 }, { "epoch": 1.258758620689655, "grad_norm": 3.8442089557647705, "learning_rate": 8.928179689202047e-06, "loss": 0.846, "step": 4563 }, { "epoch": 1.2590344827586206, "grad_norm": 3.51608943939209, "learning_rate": 8.92761519442473e-06, "loss": 0.7452, "step": 4564 }, { "epoch": 1.2593103448275862, "grad_norm": 4.35458517074585, "learning_rate": 8.927050568890918e-06, "loss": 0.8304, "step": 4565 }, { "epoch": 1.2595862068965518, "grad_norm": 3.988422393798828, "learning_rate": 8.92648581261941e-06, "loss": 0.8741, "step": 4566 }, { "epoch": 1.2598620689655173, "grad_norm": 3.9659876823425293, "learning_rate": 8.925920925629006e-06, "loss": 0.9133, "step": 4567 }, { "epoch": 1.2601379310344827, "grad_norm": 3.797848701477051, "learning_rate": 8.92535590793851e-06, "loss": 0.8102, "step": 4568 }, { "epoch": 1.2604137931034483, "grad_norm": 4.245837211608887, "learning_rate": 8.92479075956674e-06, "loss": 0.9245, "step": 4569 }, { "epoch": 1.2606896551724138, "grad_norm": 4.173982620239258, "learning_rate": 8.924225480532501e-06, "loss": 0.8332, "step": 4570 }, { "epoch": 1.2609655172413792, "grad_norm": 4.041015625, "learning_rate": 8.923660070854619e-06, "loss": 0.8498, "step": 4571 }, { "epoch": 1.2612413793103447, "grad_norm": 3.9381721019744873, "learning_rate": 8.923094530551914e-06, "loss": 0.8591, "step": 4572 }, { "epoch": 1.2615172413793103, "grad_norm": 3.8143162727355957, "learning_rate": 8.922528859643214e-06, "loss": 0.7389, "step": 4573 }, { "epoch": 1.2617931034482759, "grad_norm": 3.7508420944213867, "learning_rate": 8.921963058147352e-06, "loss": 0.894, "step": 4574 }, { "epoch": 1.2620689655172415, "grad_norm": 3.5042648315429688, "learning_rate": 8.921397126083164e-06, "loss": 0.8019, "step": 4575 }, { "epoch": 1.2623448275862068, "grad_norm": 3.9635188579559326, "learning_rate": 8.92083106346949e-06, "loss": 0.7383, "step": 4576 }, { "epoch": 1.2626206896551724, "grad_norm": 3.7491366863250732, "learning_rate": 8.920264870325177e-06, "loss": 0.7822, "step": 4577 }, { "epoch": 1.262896551724138, "grad_norm": 3.8344948291778564, "learning_rate": 8.919698546669074e-06, "loss": 0.8179, "step": 4578 }, { "epoch": 1.2631724137931035, "grad_norm": 3.8722355365753174, "learning_rate": 8.919132092520035e-06, "loss": 0.8087, "step": 4579 }, { "epoch": 1.263448275862069, "grad_norm": 4.148128509521484, "learning_rate": 8.918565507896914e-06, "loss": 0.8688, "step": 4580 }, { "epoch": 1.2637241379310344, "grad_norm": 4.029916763305664, "learning_rate": 8.91799879281858e-06, "loss": 0.8679, "step": 4581 }, { "epoch": 1.264, "grad_norm": 4.088106632232666, "learning_rate": 8.917431947303896e-06, "loss": 0.7511, "step": 4582 }, { "epoch": 1.2642758620689656, "grad_norm": 4.497491836547852, "learning_rate": 8.916864971371733e-06, "loss": 0.8704, "step": 4583 }, { "epoch": 1.264551724137931, "grad_norm": 3.793830394744873, "learning_rate": 8.916297865040967e-06, "loss": 0.753, "step": 4584 }, { "epoch": 1.2648275862068965, "grad_norm": 3.607983350753784, "learning_rate": 8.91573062833048e-06, "loss": 0.7534, "step": 4585 }, { "epoch": 1.265103448275862, "grad_norm": 3.9206321239471436, "learning_rate": 8.915163261259155e-06, "loss": 0.8144, "step": 4586 }, { "epoch": 1.2653793103448276, "grad_norm": 3.64449405670166, "learning_rate": 8.914595763845876e-06, "loss": 0.7878, "step": 4587 }, { "epoch": 1.2656551724137932, "grad_norm": 3.6125152111053467, "learning_rate": 8.914028136109544e-06, "loss": 0.726, "step": 4588 }, { "epoch": 1.2659310344827586, "grad_norm": 4.006420612335205, "learning_rate": 8.91346037806905e-06, "loss": 0.8239, "step": 4589 }, { "epoch": 1.2662068965517241, "grad_norm": 3.4065933227539062, "learning_rate": 8.912892489743299e-06, "loss": 0.7046, "step": 4590 }, { "epoch": 1.2664827586206897, "grad_norm": 3.7004799842834473, "learning_rate": 8.912324471151194e-06, "loss": 0.7233, "step": 4591 }, { "epoch": 1.266758620689655, "grad_norm": 4.0474066734313965, "learning_rate": 8.911756322311649e-06, "loss": 0.7591, "step": 4592 }, { "epoch": 1.2670344827586206, "grad_norm": 4.337769985198975, "learning_rate": 8.911188043243576e-06, "loss": 0.8035, "step": 4593 }, { "epoch": 1.2673103448275862, "grad_norm": 4.677688121795654, "learning_rate": 8.910619633965893e-06, "loss": 0.9372, "step": 4594 }, { "epoch": 1.2675862068965518, "grad_norm": 3.940885543823242, "learning_rate": 8.910051094497525e-06, "loss": 0.718, "step": 4595 }, { "epoch": 1.2678620689655173, "grad_norm": 4.316466331481934, "learning_rate": 8.909482424857399e-06, "loss": 0.821, "step": 4596 }, { "epoch": 1.2681379310344827, "grad_norm": 3.986860752105713, "learning_rate": 8.908913625064447e-06, "loss": 0.7648, "step": 4597 }, { "epoch": 1.2684137931034483, "grad_norm": 4.3839569091796875, "learning_rate": 8.908344695137605e-06, "loss": 0.7877, "step": 4598 }, { "epoch": 1.2686896551724138, "grad_norm": 4.21952486038208, "learning_rate": 8.907775635095815e-06, "loss": 0.7911, "step": 4599 }, { "epoch": 1.2689655172413792, "grad_norm": 4.40912389755249, "learning_rate": 8.90720644495802e-06, "loss": 0.8118, "step": 4600 }, { "epoch": 1.2692413793103448, "grad_norm": 4.027214050292969, "learning_rate": 8.906637124743169e-06, "loss": 0.8826, "step": 4601 }, { "epoch": 1.2695172413793103, "grad_norm": 3.963947296142578, "learning_rate": 8.906067674470217e-06, "loss": 0.8099, "step": 4602 }, { "epoch": 1.269793103448276, "grad_norm": 4.160227298736572, "learning_rate": 8.905498094158123e-06, "loss": 0.8311, "step": 4603 }, { "epoch": 1.2700689655172415, "grad_norm": 3.3866987228393555, "learning_rate": 8.904928383825846e-06, "loss": 0.7206, "step": 4604 }, { "epoch": 1.2703448275862068, "grad_norm": 3.6529529094696045, "learning_rate": 8.904358543492356e-06, "loss": 0.8738, "step": 4605 }, { "epoch": 1.2706206896551724, "grad_norm": 4.302645206451416, "learning_rate": 8.90378857317662e-06, "loss": 0.9099, "step": 4606 }, { "epoch": 1.270896551724138, "grad_norm": 3.898042678833008, "learning_rate": 8.903218472897618e-06, "loss": 0.7821, "step": 4607 }, { "epoch": 1.2711724137931035, "grad_norm": 3.6114542484283447, "learning_rate": 8.902648242674322e-06, "loss": 0.6859, "step": 4608 }, { "epoch": 1.271448275862069, "grad_norm": 3.3973443508148193, "learning_rate": 8.902077882525724e-06, "loss": 0.6538, "step": 4609 }, { "epoch": 1.2717241379310344, "grad_norm": 3.5365233421325684, "learning_rate": 8.901507392470809e-06, "loss": 0.8429, "step": 4610 }, { "epoch": 1.272, "grad_norm": 4.259336471557617, "learning_rate": 8.90093677252857e-06, "loss": 0.9409, "step": 4611 }, { "epoch": 1.2722758620689656, "grad_norm": 3.696953773498535, "learning_rate": 8.900366022718e-06, "loss": 0.7486, "step": 4612 }, { "epoch": 1.272551724137931, "grad_norm": 3.7254714965820312, "learning_rate": 8.899795143058107e-06, "loss": 0.7897, "step": 4613 }, { "epoch": 1.2728275862068965, "grad_norm": 3.9153802394866943, "learning_rate": 8.899224133567892e-06, "loss": 0.72, "step": 4614 }, { "epoch": 1.273103448275862, "grad_norm": 4.168178081512451, "learning_rate": 8.898652994266366e-06, "loss": 0.8409, "step": 4615 }, { "epoch": 1.2733793103448277, "grad_norm": 3.9889204502105713, "learning_rate": 8.89808172517254e-06, "loss": 0.8501, "step": 4616 }, { "epoch": 1.2736551724137932, "grad_norm": 4.253990650177002, "learning_rate": 8.897510326305438e-06, "loss": 0.8613, "step": 4617 }, { "epoch": 1.2739310344827586, "grad_norm": 4.009179592132568, "learning_rate": 8.89693879768408e-06, "loss": 0.9009, "step": 4618 }, { "epoch": 1.2742068965517241, "grad_norm": 3.8429079055786133, "learning_rate": 8.896367139327492e-06, "loss": 0.6807, "step": 4619 }, { "epoch": 1.2744827586206897, "grad_norm": 3.954899311065674, "learning_rate": 8.895795351254707e-06, "loss": 0.7262, "step": 4620 }, { "epoch": 1.274758620689655, "grad_norm": 3.803426504135132, "learning_rate": 8.895223433484761e-06, "loss": 0.687, "step": 4621 }, { "epoch": 1.2750344827586206, "grad_norm": 3.882667303085327, "learning_rate": 8.894651386036691e-06, "loss": 0.8048, "step": 4622 }, { "epoch": 1.2753103448275862, "grad_norm": 3.6861112117767334, "learning_rate": 8.894079208929544e-06, "loss": 0.7592, "step": 4623 }, { "epoch": 1.2755862068965518, "grad_norm": 3.688056468963623, "learning_rate": 8.893506902182372e-06, "loss": 0.8168, "step": 4624 }, { "epoch": 1.2758620689655173, "grad_norm": 3.638127088546753, "learning_rate": 8.89293446581422e-06, "loss": 0.8468, "step": 4625 }, { "epoch": 1.2761379310344827, "grad_norm": 3.5084400177001953, "learning_rate": 8.892361899844152e-06, "loss": 0.7509, "step": 4626 }, { "epoch": 1.2764137931034483, "grad_norm": 3.966891288757324, "learning_rate": 8.891789204291226e-06, "loss": 0.9396, "step": 4627 }, { "epoch": 1.2766896551724138, "grad_norm": 3.7980728149414062, "learning_rate": 8.89121637917451e-06, "loss": 0.7744, "step": 4628 }, { "epoch": 1.2769655172413792, "grad_norm": 3.751487970352173, "learning_rate": 8.890643424513074e-06, "loss": 0.7615, "step": 4629 }, { "epoch": 1.2772413793103448, "grad_norm": 4.2211198806762695, "learning_rate": 8.890070340325991e-06, "loss": 0.9164, "step": 4630 }, { "epoch": 1.2775172413793103, "grad_norm": 4.045146465301514, "learning_rate": 8.88949712663234e-06, "loss": 0.7216, "step": 4631 }, { "epoch": 1.277793103448276, "grad_norm": 4.027627468109131, "learning_rate": 8.888923783451207e-06, "loss": 0.7167, "step": 4632 }, { "epoch": 1.2780689655172415, "grad_norm": 3.566089153289795, "learning_rate": 8.888350310801677e-06, "loss": 0.626, "step": 4633 }, { "epoch": 1.2783448275862068, "grad_norm": 3.8093481063842773, "learning_rate": 8.887776708702842e-06, "loss": 0.8177, "step": 4634 }, { "epoch": 1.2786206896551724, "grad_norm": 4.088308811187744, "learning_rate": 8.887202977173799e-06, "loss": 0.8313, "step": 4635 }, { "epoch": 1.278896551724138, "grad_norm": 4.167995929718018, "learning_rate": 8.886629116233647e-06, "loss": 0.7996, "step": 4636 }, { "epoch": 1.2791724137931033, "grad_norm": 3.76149320602417, "learning_rate": 8.886055125901491e-06, "loss": 0.7677, "step": 4637 }, { "epoch": 1.2794482758620689, "grad_norm": 3.4873034954071045, "learning_rate": 8.885481006196442e-06, "loss": 0.7986, "step": 4638 }, { "epoch": 1.2797241379310345, "grad_norm": 3.983670711517334, "learning_rate": 8.884906757137612e-06, "loss": 0.888, "step": 4639 }, { "epoch": 1.28, "grad_norm": 3.6615002155303955, "learning_rate": 8.88433237874412e-06, "loss": 0.7543, "step": 4640 }, { "epoch": 1.2802758620689656, "grad_norm": 3.984882354736328, "learning_rate": 8.883757871035083e-06, "loss": 0.7246, "step": 4641 }, { "epoch": 1.280551724137931, "grad_norm": 4.03247594833374, "learning_rate": 8.883183234029635e-06, "loss": 0.9678, "step": 4642 }, { "epoch": 1.2808275862068965, "grad_norm": 4.0288214683532715, "learning_rate": 8.8826084677469e-06, "loss": 0.87, "step": 4643 }, { "epoch": 1.281103448275862, "grad_norm": 3.909496307373047, "learning_rate": 8.882033572206017e-06, "loss": 0.7727, "step": 4644 }, { "epoch": 1.2813793103448277, "grad_norm": 3.6850759983062744, "learning_rate": 8.881458547426121e-06, "loss": 0.7372, "step": 4645 }, { "epoch": 1.2816551724137932, "grad_norm": 4.042511463165283, "learning_rate": 8.88088339342636e-06, "loss": 0.8168, "step": 4646 }, { "epoch": 1.2819310344827586, "grad_norm": 4.405542373657227, "learning_rate": 8.880308110225879e-06, "loss": 0.7797, "step": 4647 }, { "epoch": 1.2822068965517242, "grad_norm": 4.02631950378418, "learning_rate": 8.879732697843831e-06, "loss": 0.7595, "step": 4648 }, { "epoch": 1.2824827586206897, "grad_norm": 3.7483203411102295, "learning_rate": 8.879157156299373e-06, "loss": 0.802, "step": 4649 }, { "epoch": 1.282758620689655, "grad_norm": 4.367844581604004, "learning_rate": 8.878581485611664e-06, "loss": 0.8499, "step": 4650 }, { "epoch": 1.2830344827586206, "grad_norm": 4.225892066955566, "learning_rate": 8.87800568579987e-06, "loss": 0.8252, "step": 4651 }, { "epoch": 1.2833103448275862, "grad_norm": 4.027849197387695, "learning_rate": 8.877429756883161e-06, "loss": 0.7103, "step": 4652 }, { "epoch": 1.2835862068965518, "grad_norm": 3.831085443496704, "learning_rate": 8.87685369888071e-06, "loss": 0.73, "step": 4653 }, { "epoch": 1.2838620689655174, "grad_norm": 3.76352858543396, "learning_rate": 8.876277511811693e-06, "loss": 0.6767, "step": 4654 }, { "epoch": 1.2841379310344827, "grad_norm": 3.421617031097412, "learning_rate": 8.875701195695297e-06, "loss": 0.7976, "step": 4655 }, { "epoch": 1.2844137931034483, "grad_norm": 4.20257043838501, "learning_rate": 8.875124750550702e-06, "loss": 0.9374, "step": 4656 }, { "epoch": 1.2846896551724138, "grad_norm": 4.318879127502441, "learning_rate": 8.874548176397104e-06, "loss": 0.8928, "step": 4657 }, { "epoch": 1.2849655172413792, "grad_norm": 4.094013690948486, "learning_rate": 8.873971473253695e-06, "loss": 0.7318, "step": 4658 }, { "epoch": 1.2852413793103448, "grad_norm": 4.305492877960205, "learning_rate": 8.873394641139676e-06, "loss": 0.8611, "step": 4659 }, { "epoch": 1.2855172413793103, "grad_norm": 3.931356430053711, "learning_rate": 8.87281768007425e-06, "loss": 0.8061, "step": 4660 }, { "epoch": 1.285793103448276, "grad_norm": 4.764166355133057, "learning_rate": 8.872240590076625e-06, "loss": 0.9169, "step": 4661 }, { "epoch": 1.2860689655172415, "grad_norm": 4.360660552978516, "learning_rate": 8.871663371166015e-06, "loss": 1.0106, "step": 4662 }, { "epoch": 1.2863448275862068, "grad_norm": 3.8884384632110596, "learning_rate": 8.871086023361632e-06, "loss": 0.806, "step": 4663 }, { "epoch": 1.2866206896551724, "grad_norm": 3.8651843070983887, "learning_rate": 8.8705085466827e-06, "loss": 0.7663, "step": 4664 }, { "epoch": 1.286896551724138, "grad_norm": 3.832667350769043, "learning_rate": 8.869930941148445e-06, "loss": 0.7916, "step": 4665 }, { "epoch": 1.2871724137931033, "grad_norm": 3.945230722427368, "learning_rate": 8.869353206778096e-06, "loss": 0.9035, "step": 4666 }, { "epoch": 1.287448275862069, "grad_norm": 4.117520809173584, "learning_rate": 8.868775343590886e-06, "loss": 0.8559, "step": 4667 }, { "epoch": 1.2877241379310345, "grad_norm": 3.8783860206604004, "learning_rate": 8.868197351606052e-06, "loss": 0.7569, "step": 4668 }, { "epoch": 1.288, "grad_norm": 3.913349151611328, "learning_rate": 8.867619230842836e-06, "loss": 0.7638, "step": 4669 }, { "epoch": 1.2882758620689656, "grad_norm": 4.1995062828063965, "learning_rate": 8.867040981320488e-06, "loss": 0.8743, "step": 4670 }, { "epoch": 1.288551724137931, "grad_norm": 4.12032413482666, "learning_rate": 8.866462603058254e-06, "loss": 0.9082, "step": 4671 }, { "epoch": 1.2888275862068965, "grad_norm": 3.7268710136413574, "learning_rate": 8.865884096075392e-06, "loss": 0.7311, "step": 4672 }, { "epoch": 1.289103448275862, "grad_norm": 4.120909690856934, "learning_rate": 8.865305460391162e-06, "loss": 0.8871, "step": 4673 }, { "epoch": 1.2893793103448277, "grad_norm": 4.107940673828125, "learning_rate": 8.864726696024826e-06, "loss": 0.7789, "step": 4674 }, { "epoch": 1.2896551724137932, "grad_norm": 3.9212803840637207, "learning_rate": 8.864147802995654e-06, "loss": 0.8272, "step": 4675 }, { "epoch": 1.2899310344827586, "grad_norm": 4.208202838897705, "learning_rate": 8.863568781322916e-06, "loss": 0.7911, "step": 4676 }, { "epoch": 1.2902068965517242, "grad_norm": 4.283446788787842, "learning_rate": 8.86298963102589e-06, "loss": 0.851, "step": 4677 }, { "epoch": 1.2904827586206897, "grad_norm": 3.7918004989624023, "learning_rate": 8.862410352123855e-06, "loss": 0.8192, "step": 4678 }, { "epoch": 1.290758620689655, "grad_norm": 4.007400035858154, "learning_rate": 8.861830944636099e-06, "loss": 0.884, "step": 4679 }, { "epoch": 1.2910344827586206, "grad_norm": 3.9601855278015137, "learning_rate": 8.861251408581911e-06, "loss": 0.7176, "step": 4680 }, { "epoch": 1.2913103448275862, "grad_norm": 3.9576258659362793, "learning_rate": 8.86067174398058e-06, "loss": 0.9003, "step": 4681 }, { "epoch": 1.2915862068965518, "grad_norm": 4.0018391609191895, "learning_rate": 8.86009195085141e-06, "loss": 0.8203, "step": 4682 }, { "epoch": 1.2918620689655174, "grad_norm": 4.289032459259033, "learning_rate": 8.859512029213699e-06, "loss": 0.81, "step": 4683 }, { "epoch": 1.2921379310344827, "grad_norm": 3.895374298095703, "learning_rate": 8.858931979086755e-06, "loss": 0.9353, "step": 4684 }, { "epoch": 1.2924137931034483, "grad_norm": 3.9391086101531982, "learning_rate": 8.858351800489892e-06, "loss": 0.7404, "step": 4685 }, { "epoch": 1.2926896551724139, "grad_norm": 3.6126747131347656, "learning_rate": 8.85777149344242e-06, "loss": 0.8786, "step": 4686 }, { "epoch": 1.2929655172413792, "grad_norm": 3.988673448562622, "learning_rate": 8.857191057963658e-06, "loss": 0.9507, "step": 4687 }, { "epoch": 1.2932413793103448, "grad_norm": 3.3966007232666016, "learning_rate": 8.856610494072934e-06, "loss": 0.7848, "step": 4688 }, { "epoch": 1.2935172413793103, "grad_norm": 4.0002593994140625, "learning_rate": 8.856029801789576e-06, "loss": 0.7087, "step": 4689 }, { "epoch": 1.293793103448276, "grad_norm": 3.9837913513183594, "learning_rate": 8.855448981132913e-06, "loss": 0.73, "step": 4690 }, { "epoch": 1.2940689655172415, "grad_norm": 4.311925888061523, "learning_rate": 8.854868032122281e-06, "loss": 0.9818, "step": 4691 }, { "epoch": 1.2943448275862068, "grad_norm": 5.616240501403809, "learning_rate": 8.854286954777023e-06, "loss": 0.993, "step": 4692 }, { "epoch": 1.2946206896551724, "grad_norm": 3.5971457958221436, "learning_rate": 8.85370574911648e-06, "loss": 0.7855, "step": 4693 }, { "epoch": 1.294896551724138, "grad_norm": 4.319337368011475, "learning_rate": 8.85312441516001e-06, "loss": 0.8301, "step": 4694 }, { "epoch": 1.2951724137931033, "grad_norm": 3.847661256790161, "learning_rate": 8.852542952926957e-06, "loss": 0.7267, "step": 4695 }, { "epoch": 1.295448275862069, "grad_norm": 4.0631103515625, "learning_rate": 8.851961362436685e-06, "loss": 0.8632, "step": 4696 }, { "epoch": 1.2957241379310345, "grad_norm": 3.9469404220581055, "learning_rate": 8.85137964370855e-06, "loss": 0.8661, "step": 4697 }, { "epoch": 1.296, "grad_norm": 3.937213659286499, "learning_rate": 8.850797796761927e-06, "loss": 0.7552, "step": 4698 }, { "epoch": 1.2962758620689656, "grad_norm": 3.9192347526550293, "learning_rate": 8.85021582161618e-06, "loss": 0.7453, "step": 4699 }, { "epoch": 1.296551724137931, "grad_norm": 3.8876116275787354, "learning_rate": 8.849633718290684e-06, "loss": 0.7324, "step": 4700 }, { "epoch": 1.2968275862068965, "grad_norm": 4.17075252532959, "learning_rate": 8.849051486804821e-06, "loss": 0.8195, "step": 4701 }, { "epoch": 1.297103448275862, "grad_norm": 4.557609558105469, "learning_rate": 8.848469127177972e-06, "loss": 0.821, "step": 4702 }, { "epoch": 1.2973793103448275, "grad_norm": 3.5934481620788574, "learning_rate": 8.847886639429526e-06, "loss": 0.8507, "step": 4703 }, { "epoch": 1.297655172413793, "grad_norm": 3.9272003173828125, "learning_rate": 8.847304023578875e-06, "loss": 0.7494, "step": 4704 }, { "epoch": 1.2979310344827586, "grad_norm": 4.276572227478027, "learning_rate": 8.846721279645416e-06, "loss": 0.6841, "step": 4705 }, { "epoch": 1.2982068965517242, "grad_norm": 3.7393226623535156, "learning_rate": 8.846138407648547e-06, "loss": 0.8178, "step": 4706 }, { "epoch": 1.2984827586206897, "grad_norm": 3.9559526443481445, "learning_rate": 8.845555407607674e-06, "loss": 0.8327, "step": 4707 }, { "epoch": 1.298758620689655, "grad_norm": 3.629023551940918, "learning_rate": 8.844972279542207e-06, "loss": 0.7934, "step": 4708 }, { "epoch": 1.2990344827586207, "grad_norm": 3.9570088386535645, "learning_rate": 8.844389023471559e-06, "loss": 0.7775, "step": 4709 }, { "epoch": 1.2993103448275862, "grad_norm": 3.7625231742858887, "learning_rate": 8.843805639415143e-06, "loss": 0.7901, "step": 4710 }, { "epoch": 1.2995862068965518, "grad_norm": 4.025181293487549, "learning_rate": 8.843222127392389e-06, "loss": 0.906, "step": 4711 }, { "epoch": 1.2998620689655174, "grad_norm": 4.17596960067749, "learning_rate": 8.842638487422717e-06, "loss": 0.7348, "step": 4712 }, { "epoch": 1.3001379310344827, "grad_norm": 3.699242353439331, "learning_rate": 8.84205471952556e-06, "loss": 0.7233, "step": 4713 }, { "epoch": 1.3004137931034483, "grad_norm": 3.9326844215393066, "learning_rate": 8.841470823720349e-06, "loss": 0.8237, "step": 4714 }, { "epoch": 1.3006896551724139, "grad_norm": 4.773733615875244, "learning_rate": 8.840886800026527e-06, "loss": 0.9745, "step": 4715 }, { "epoch": 1.3009655172413792, "grad_norm": 3.592639207839966, "learning_rate": 8.840302648463536e-06, "loss": 0.7544, "step": 4716 }, { "epoch": 1.3012413793103448, "grad_norm": 4.076207160949707, "learning_rate": 8.839718369050822e-06, "loss": 0.8898, "step": 4717 }, { "epoch": 1.3015172413793104, "grad_norm": 4.088522434234619, "learning_rate": 8.839133961807836e-06, "loss": 0.8349, "step": 4718 }, { "epoch": 1.301793103448276, "grad_norm": 4.052059650421143, "learning_rate": 8.838549426754038e-06, "loss": 0.7433, "step": 4719 }, { "epoch": 1.3020689655172415, "grad_norm": 3.952308416366577, "learning_rate": 8.837964763908882e-06, "loss": 0.8138, "step": 4720 }, { "epoch": 1.3023448275862068, "grad_norm": 3.721081018447876, "learning_rate": 8.837379973291838e-06, "loss": 0.8646, "step": 4721 }, { "epoch": 1.3026206896551724, "grad_norm": 4.070348262786865, "learning_rate": 8.836795054922371e-06, "loss": 0.9003, "step": 4722 }, { "epoch": 1.302896551724138, "grad_norm": 3.919769048690796, "learning_rate": 8.836210008819955e-06, "loss": 0.6, "step": 4723 }, { "epoch": 1.3031724137931033, "grad_norm": 3.8112547397613525, "learning_rate": 8.835624835004068e-06, "loss": 0.7219, "step": 4724 }, { "epoch": 1.303448275862069, "grad_norm": 4.037749767303467, "learning_rate": 8.835039533494189e-06, "loss": 0.8171, "step": 4725 }, { "epoch": 1.3037241379310345, "grad_norm": 4.048585414886475, "learning_rate": 8.834454104309806e-06, "loss": 0.9766, "step": 4726 }, { "epoch": 1.304, "grad_norm": 3.8659541606903076, "learning_rate": 8.833868547470407e-06, "loss": 0.7162, "step": 4727 }, { "epoch": 1.3042758620689656, "grad_norm": 4.231717109680176, "learning_rate": 8.83328286299549e-06, "loss": 0.948, "step": 4728 }, { "epoch": 1.304551724137931, "grad_norm": 4.027687072753906, "learning_rate": 8.832697050904547e-06, "loss": 0.8977, "step": 4729 }, { "epoch": 1.3048275862068965, "grad_norm": 4.018397331237793, "learning_rate": 8.832111111217086e-06, "loss": 0.7821, "step": 4730 }, { "epoch": 1.305103448275862, "grad_norm": 4.465954780578613, "learning_rate": 8.831525043952611e-06, "loss": 0.8814, "step": 4731 }, { "epoch": 1.3053793103448275, "grad_norm": 3.761953115463257, "learning_rate": 8.830938849130634e-06, "loss": 0.784, "step": 4732 }, { "epoch": 1.305655172413793, "grad_norm": 3.581080198287964, "learning_rate": 8.83035252677067e-06, "loss": 0.6217, "step": 4733 }, { "epoch": 1.3059310344827586, "grad_norm": 4.1492156982421875, "learning_rate": 8.82976607689224e-06, "loss": 0.9527, "step": 4734 }, { "epoch": 1.3062068965517242, "grad_norm": 3.918924570083618, "learning_rate": 8.829179499514866e-06, "loss": 0.8046, "step": 4735 }, { "epoch": 1.3064827586206897, "grad_norm": 4.122922897338867, "learning_rate": 8.828592794658078e-06, "loss": 0.8241, "step": 4736 }, { "epoch": 1.306758620689655, "grad_norm": 3.7643790245056152, "learning_rate": 8.828005962341405e-06, "loss": 0.8053, "step": 4737 }, { "epoch": 1.3070344827586207, "grad_norm": 4.52841854095459, "learning_rate": 8.827419002584387e-06, "loss": 0.8632, "step": 4738 }, { "epoch": 1.3073103448275862, "grad_norm": 4.140122413635254, "learning_rate": 8.826831915406565e-06, "loss": 0.8015, "step": 4739 }, { "epoch": 1.3075862068965518, "grad_norm": 4.526918888092041, "learning_rate": 8.82624470082748e-06, "loss": 0.8518, "step": 4740 }, { "epoch": 1.3078620689655174, "grad_norm": 3.916872024536133, "learning_rate": 8.825657358866686e-06, "loss": 0.9136, "step": 4741 }, { "epoch": 1.3081379310344827, "grad_norm": 3.9576783180236816, "learning_rate": 8.825069889543733e-06, "loss": 0.865, "step": 4742 }, { "epoch": 1.3084137931034483, "grad_norm": 3.9420177936553955, "learning_rate": 8.82448229287818e-06, "loss": 0.8268, "step": 4743 }, { "epoch": 1.3086896551724139, "grad_norm": 4.309457778930664, "learning_rate": 8.82389456888959e-06, "loss": 0.8496, "step": 4744 }, { "epoch": 1.3089655172413792, "grad_norm": 3.760244131088257, "learning_rate": 8.82330671759753e-06, "loss": 0.7831, "step": 4745 }, { "epoch": 1.3092413793103448, "grad_norm": 4.13879919052124, "learning_rate": 8.822718739021568e-06, "loss": 0.8022, "step": 4746 }, { "epoch": 1.3095172413793104, "grad_norm": 3.972648859024048, "learning_rate": 8.822130633181279e-06, "loss": 0.7713, "step": 4747 }, { "epoch": 1.309793103448276, "grad_norm": 3.5413706302642822, "learning_rate": 8.821542400096241e-06, "loss": 0.6917, "step": 4748 }, { "epoch": 1.3100689655172415, "grad_norm": 4.224603652954102, "learning_rate": 8.820954039786042e-06, "loss": 0.8736, "step": 4749 }, { "epoch": 1.3103448275862069, "grad_norm": 4.000744342803955, "learning_rate": 8.820365552270264e-06, "loss": 0.8526, "step": 4750 }, { "epoch": 1.3106206896551724, "grad_norm": 3.7974541187286377, "learning_rate": 8.819776937568503e-06, "loss": 0.8136, "step": 4751 }, { "epoch": 1.310896551724138, "grad_norm": 3.7108943462371826, "learning_rate": 8.819188195700353e-06, "loss": 0.9327, "step": 4752 }, { "epoch": 1.3111724137931033, "grad_norm": 3.999199867248535, "learning_rate": 8.818599326685413e-06, "loss": 0.7474, "step": 4753 }, { "epoch": 1.311448275862069, "grad_norm": 4.41497802734375, "learning_rate": 8.81801033054329e-06, "loss": 0.7195, "step": 4754 }, { "epoch": 1.3117241379310345, "grad_norm": 4.185975074768066, "learning_rate": 8.817421207293588e-06, "loss": 0.8215, "step": 4755 }, { "epoch": 1.312, "grad_norm": 4.325428009033203, "learning_rate": 8.816831956955926e-06, "loss": 0.8345, "step": 4756 }, { "epoch": 1.3122758620689656, "grad_norm": 4.3022565841674805, "learning_rate": 8.816242579549916e-06, "loss": 0.868, "step": 4757 }, { "epoch": 1.312551724137931, "grad_norm": 4.002386569976807, "learning_rate": 8.815653075095182e-06, "loss": 0.9151, "step": 4758 }, { "epoch": 1.3128275862068965, "grad_norm": 3.865762233734131, "learning_rate": 8.81506344361135e-06, "loss": 0.8125, "step": 4759 }, { "epoch": 1.3131034482758621, "grad_norm": 3.9845738410949707, "learning_rate": 8.814473685118047e-06, "loss": 0.7985, "step": 4760 }, { "epoch": 1.3133793103448275, "grad_norm": 3.935837745666504, "learning_rate": 8.813883799634909e-06, "loss": 0.7502, "step": 4761 }, { "epoch": 1.313655172413793, "grad_norm": 3.8387904167175293, "learning_rate": 8.813293787181574e-06, "loss": 0.7382, "step": 4762 }, { "epoch": 1.3139310344827586, "grad_norm": 4.227972507476807, "learning_rate": 8.812703647777684e-06, "loss": 0.7617, "step": 4763 }, { "epoch": 1.3142068965517242, "grad_norm": 4.287699222564697, "learning_rate": 8.812113381442886e-06, "loss": 0.8405, "step": 4764 }, { "epoch": 1.3144827586206898, "grad_norm": 4.395445346832275, "learning_rate": 8.81152298819683e-06, "loss": 0.8567, "step": 4765 }, { "epoch": 1.314758620689655, "grad_norm": 3.7644741535186768, "learning_rate": 8.810932468059173e-06, "loss": 0.8039, "step": 4766 }, { "epoch": 1.3150344827586207, "grad_norm": 3.867738723754883, "learning_rate": 8.810341821049573e-06, "loss": 0.8324, "step": 4767 }, { "epoch": 1.3153103448275862, "grad_norm": 3.571425676345825, "learning_rate": 8.809751047187693e-06, "loss": 0.7694, "step": 4768 }, { "epoch": 1.3155862068965516, "grad_norm": 3.8547043800354004, "learning_rate": 8.809160146493202e-06, "loss": 0.8187, "step": 4769 }, { "epoch": 1.3158620689655172, "grad_norm": 3.7523720264434814, "learning_rate": 8.808569118985772e-06, "loss": 0.7498, "step": 4770 }, { "epoch": 1.3161379310344827, "grad_norm": 4.221981525421143, "learning_rate": 8.80797796468508e-06, "loss": 0.7704, "step": 4771 }, { "epoch": 1.3164137931034483, "grad_norm": 4.5572829246521, "learning_rate": 8.807386683610803e-06, "loss": 0.8332, "step": 4772 }, { "epoch": 1.3166896551724139, "grad_norm": 4.16621208190918, "learning_rate": 8.80679527578263e-06, "loss": 0.9475, "step": 4773 }, { "epoch": 1.3169655172413792, "grad_norm": 4.001545429229736, "learning_rate": 8.806203741220247e-06, "loss": 0.792, "step": 4774 }, { "epoch": 1.3172413793103448, "grad_norm": 4.408848762512207, "learning_rate": 8.805612079943348e-06, "loss": 0.9306, "step": 4775 }, { "epoch": 1.3175172413793104, "grad_norm": 4.005407810211182, "learning_rate": 8.80502029197163e-06, "loss": 0.7511, "step": 4776 }, { "epoch": 1.317793103448276, "grad_norm": 4.393120288848877, "learning_rate": 8.804428377324795e-06, "loss": 0.9218, "step": 4777 }, { "epoch": 1.3180689655172415, "grad_norm": 4.010434150695801, "learning_rate": 8.803836336022548e-06, "loss": 0.7312, "step": 4778 }, { "epoch": 1.3183448275862069, "grad_norm": 3.414024591445923, "learning_rate": 8.8032441680846e-06, "loss": 0.7334, "step": 4779 }, { "epoch": 1.3186206896551724, "grad_norm": 3.8483123779296875, "learning_rate": 8.802651873530668e-06, "loss": 0.7054, "step": 4780 }, { "epoch": 1.318896551724138, "grad_norm": 3.937018632888794, "learning_rate": 8.802059452380464e-06, "loss": 0.8911, "step": 4781 }, { "epoch": 1.3191724137931033, "grad_norm": 3.9425861835479736, "learning_rate": 8.801466904653714e-06, "loss": 0.7156, "step": 4782 }, { "epoch": 1.319448275862069, "grad_norm": 3.698223114013672, "learning_rate": 8.800874230370147e-06, "loss": 0.7893, "step": 4783 }, { "epoch": 1.3197241379310345, "grad_norm": 3.9347152709960938, "learning_rate": 8.80028142954949e-06, "loss": 0.7112, "step": 4784 }, { "epoch": 1.32, "grad_norm": 4.047330379486084, "learning_rate": 8.79968850221148e-06, "loss": 0.7994, "step": 4785 }, { "epoch": 1.3202758620689656, "grad_norm": 3.6211979389190674, "learning_rate": 8.799095448375855e-06, "loss": 0.7628, "step": 4786 }, { "epoch": 1.320551724137931, "grad_norm": 4.48220157623291, "learning_rate": 8.798502268062364e-06, "loss": 0.8765, "step": 4787 }, { "epoch": 1.3208275862068966, "grad_norm": 4.199865341186523, "learning_rate": 8.797908961290749e-06, "loss": 0.663, "step": 4788 }, { "epoch": 1.3211034482758621, "grad_norm": 4.079330921173096, "learning_rate": 8.797315528080765e-06, "loss": 0.8842, "step": 4789 }, { "epoch": 1.3213793103448275, "grad_norm": 4.157276153564453, "learning_rate": 8.796721968452168e-06, "loss": 0.7445, "step": 4790 }, { "epoch": 1.321655172413793, "grad_norm": 3.648688316345215, "learning_rate": 8.796128282424717e-06, "loss": 0.7837, "step": 4791 }, { "epoch": 1.3219310344827586, "grad_norm": 4.282891273498535, "learning_rate": 8.79553447001818e-06, "loss": 0.9627, "step": 4792 }, { "epoch": 1.3222068965517242, "grad_norm": 4.416942596435547, "learning_rate": 8.794940531252321e-06, "loss": 0.8259, "step": 4793 }, { "epoch": 1.3224827586206898, "grad_norm": 3.8875246047973633, "learning_rate": 8.794346466146917e-06, "loss": 0.6922, "step": 4794 }, { "epoch": 1.322758620689655, "grad_norm": 3.611337184906006, "learning_rate": 8.793752274721746e-06, "loss": 0.7393, "step": 4795 }, { "epoch": 1.3230344827586207, "grad_norm": 3.9439046382904053, "learning_rate": 8.793157956996585e-06, "loss": 0.7595, "step": 4796 }, { "epoch": 1.3233103448275862, "grad_norm": 3.7108240127563477, "learning_rate": 8.792563512991225e-06, "loss": 0.8721, "step": 4797 }, { "epoch": 1.3235862068965516, "grad_norm": 4.035660743713379, "learning_rate": 8.791968942725453e-06, "loss": 0.7946, "step": 4798 }, { "epoch": 1.3238620689655172, "grad_norm": 4.062170028686523, "learning_rate": 8.791374246219065e-06, "loss": 0.8714, "step": 4799 }, { "epoch": 1.3241379310344827, "grad_norm": 4.1097846031188965, "learning_rate": 8.790779423491857e-06, "loss": 0.8834, "step": 4800 }, { "epoch": 1.3244137931034483, "grad_norm": 4.111985206604004, "learning_rate": 8.790184474563633e-06, "loss": 0.874, "step": 4801 }, { "epoch": 1.3246896551724139, "grad_norm": 4.03842830657959, "learning_rate": 8.789589399454198e-06, "loss": 0.8454, "step": 4802 }, { "epoch": 1.3249655172413792, "grad_norm": 4.1613969802856445, "learning_rate": 8.788994198183367e-06, "loss": 0.9243, "step": 4803 }, { "epoch": 1.3252413793103448, "grad_norm": 3.864766836166382, "learning_rate": 8.788398870770954e-06, "loss": 0.7594, "step": 4804 }, { "epoch": 1.3255172413793104, "grad_norm": 3.9495325088500977, "learning_rate": 8.787803417236774e-06, "loss": 0.9289, "step": 4805 }, { "epoch": 1.325793103448276, "grad_norm": 4.061553955078125, "learning_rate": 8.787207837600655e-06, "loss": 0.7993, "step": 4806 }, { "epoch": 1.3260689655172415, "grad_norm": 3.4678173065185547, "learning_rate": 8.786612131882424e-06, "loss": 0.7295, "step": 4807 }, { "epoch": 1.3263448275862069, "grad_norm": 3.729607582092285, "learning_rate": 8.786016300101912e-06, "loss": 0.8314, "step": 4808 }, { "epoch": 1.3266206896551724, "grad_norm": 3.757863759994507, "learning_rate": 8.785420342278957e-06, "loss": 0.8764, "step": 4809 }, { "epoch": 1.326896551724138, "grad_norm": 3.608367681503296, "learning_rate": 8.784824258433396e-06, "loss": 0.7436, "step": 4810 }, { "epoch": 1.3271724137931034, "grad_norm": 3.410202980041504, "learning_rate": 8.784228048585078e-06, "loss": 0.7504, "step": 4811 }, { "epoch": 1.327448275862069, "grad_norm": 4.310059547424316, "learning_rate": 8.783631712753846e-06, "loss": 0.7637, "step": 4812 }, { "epoch": 1.3277241379310345, "grad_norm": 4.027735710144043, "learning_rate": 8.78303525095956e-06, "loss": 0.9158, "step": 4813 }, { "epoch": 1.328, "grad_norm": 4.298893928527832, "learning_rate": 8.782438663222071e-06, "loss": 0.8112, "step": 4814 }, { "epoch": 1.3282758620689656, "grad_norm": 3.9108693599700928, "learning_rate": 8.781841949561246e-06, "loss": 0.8411, "step": 4815 }, { "epoch": 1.328551724137931, "grad_norm": 4.138310432434082, "learning_rate": 8.781245109996943e-06, "loss": 0.7635, "step": 4816 }, { "epoch": 1.3288275862068966, "grad_norm": 4.100400924682617, "learning_rate": 8.78064814454904e-06, "loss": 0.8191, "step": 4817 }, { "epoch": 1.3291034482758621, "grad_norm": 3.841464042663574, "learning_rate": 8.780051053237403e-06, "loss": 0.6416, "step": 4818 }, { "epoch": 1.3293793103448275, "grad_norm": 3.8842763900756836, "learning_rate": 8.779453836081918e-06, "loss": 0.8338, "step": 4819 }, { "epoch": 1.329655172413793, "grad_norm": 4.175421237945557, "learning_rate": 8.778856493102463e-06, "loss": 0.7905, "step": 4820 }, { "epoch": 1.3299310344827586, "grad_norm": 3.7598659992218018, "learning_rate": 8.778259024318926e-06, "loss": 0.7782, "step": 4821 }, { "epoch": 1.3302068965517242, "grad_norm": 3.9117307662963867, "learning_rate": 8.777661429751192e-06, "loss": 0.7826, "step": 4822 }, { "epoch": 1.3304827586206898, "grad_norm": 3.890913248062134, "learning_rate": 8.777063709419165e-06, "loss": 0.8127, "step": 4823 }, { "epoch": 1.3307586206896551, "grad_norm": 3.587486505508423, "learning_rate": 8.776465863342737e-06, "loss": 0.7246, "step": 4824 }, { "epoch": 1.3310344827586207, "grad_norm": 3.5807669162750244, "learning_rate": 8.775867891541815e-06, "loss": 0.7909, "step": 4825 }, { "epoch": 1.3313103448275863, "grad_norm": 3.956416368484497, "learning_rate": 8.775269794036306e-06, "loss": 0.7709, "step": 4826 }, { "epoch": 1.3315862068965516, "grad_norm": 3.962993621826172, "learning_rate": 8.774671570846118e-06, "loss": 0.8366, "step": 4827 }, { "epoch": 1.3318620689655172, "grad_norm": 3.9875404834747314, "learning_rate": 8.774073221991171e-06, "loss": 0.7724, "step": 4828 }, { "epoch": 1.3321379310344827, "grad_norm": 3.518254280090332, "learning_rate": 8.773474747491386e-06, "loss": 0.7214, "step": 4829 }, { "epoch": 1.3324137931034483, "grad_norm": 3.9731669425964355, "learning_rate": 8.772876147366682e-06, "loss": 0.742, "step": 4830 }, { "epoch": 1.332689655172414, "grad_norm": 4.224135875701904, "learning_rate": 8.77227742163699e-06, "loss": 0.7709, "step": 4831 }, { "epoch": 1.3329655172413792, "grad_norm": 3.5466058254241943, "learning_rate": 8.771678570322245e-06, "loss": 0.7291, "step": 4832 }, { "epoch": 1.3332413793103448, "grad_norm": 3.9463162422180176, "learning_rate": 8.771079593442378e-06, "loss": 0.7782, "step": 4833 }, { "epoch": 1.3335172413793104, "grad_norm": 4.236322402954102, "learning_rate": 8.770480491017336e-06, "loss": 0.8905, "step": 4834 }, { "epoch": 1.3337931034482757, "grad_norm": 3.944694757461548, "learning_rate": 8.769881263067061e-06, "loss": 0.6488, "step": 4835 }, { "epoch": 1.3340689655172413, "grad_norm": 3.8713555335998535, "learning_rate": 8.769281909611501e-06, "loss": 0.8237, "step": 4836 }, { "epoch": 1.3343448275862069, "grad_norm": 3.7568912506103516, "learning_rate": 8.768682430670613e-06, "loss": 0.7524, "step": 4837 }, { "epoch": 1.3346206896551724, "grad_norm": 3.8284225463867188, "learning_rate": 8.76808282626435e-06, "loss": 0.7851, "step": 4838 }, { "epoch": 1.334896551724138, "grad_norm": 3.578835964202881, "learning_rate": 8.767483096412677e-06, "loss": 0.6496, "step": 4839 }, { "epoch": 1.3351724137931034, "grad_norm": 4.109687328338623, "learning_rate": 8.766883241135561e-06, "loss": 0.8537, "step": 4840 }, { "epoch": 1.335448275862069, "grad_norm": 3.9458768367767334, "learning_rate": 8.766283260452968e-06, "loss": 0.8756, "step": 4841 }, { "epoch": 1.3357241379310345, "grad_norm": 4.2613301277160645, "learning_rate": 8.765683154384877e-06, "loss": 0.8202, "step": 4842 }, { "epoch": 1.336, "grad_norm": 4.300932884216309, "learning_rate": 8.765082922951263e-06, "loss": 1.0107, "step": 4843 }, { "epoch": 1.3362758620689656, "grad_norm": 3.7925353050231934, "learning_rate": 8.76448256617211e-06, "loss": 0.7533, "step": 4844 }, { "epoch": 1.336551724137931, "grad_norm": 3.8814098834991455, "learning_rate": 8.763882084067405e-06, "loss": 0.6996, "step": 4845 }, { "epoch": 1.3368275862068966, "grad_norm": 4.035908222198486, "learning_rate": 8.763281476657138e-06, "loss": 0.7907, "step": 4846 }, { "epoch": 1.3371034482758621, "grad_norm": 3.486100673675537, "learning_rate": 8.762680743961305e-06, "loss": 0.8094, "step": 4847 }, { "epoch": 1.3373793103448275, "grad_norm": 4.554157257080078, "learning_rate": 8.762079885999905e-06, "loss": 0.959, "step": 4848 }, { "epoch": 1.337655172413793, "grad_norm": 3.7023184299468994, "learning_rate": 8.761478902792942e-06, "loss": 0.7879, "step": 4849 }, { "epoch": 1.3379310344827586, "grad_norm": 4.2986321449279785, "learning_rate": 8.760877794360422e-06, "loss": 0.8283, "step": 4850 }, { "epoch": 1.3382068965517242, "grad_norm": 3.560688018798828, "learning_rate": 8.760276560722358e-06, "loss": 0.7255, "step": 4851 }, { "epoch": 1.3384827586206898, "grad_norm": 3.6003012657165527, "learning_rate": 8.759675201898767e-06, "loss": 0.7644, "step": 4852 }, { "epoch": 1.3387586206896551, "grad_norm": 3.869324207305908, "learning_rate": 8.75907371790967e-06, "loss": 0.7618, "step": 4853 }, { "epoch": 1.3390344827586207, "grad_norm": 4.439943313598633, "learning_rate": 8.75847210877509e-06, "loss": 0.7981, "step": 4854 }, { "epoch": 1.3393103448275863, "grad_norm": 3.6938629150390625, "learning_rate": 8.757870374515051e-06, "loss": 0.807, "step": 4855 }, { "epoch": 1.3395862068965516, "grad_norm": 3.989760160446167, "learning_rate": 8.757268515149593e-06, "loss": 0.7736, "step": 4856 }, { "epoch": 1.3398620689655172, "grad_norm": 3.904780387878418, "learning_rate": 8.75666653069875e-06, "loss": 0.7358, "step": 4857 }, { "epoch": 1.3401379310344828, "grad_norm": 3.996553659439087, "learning_rate": 8.756064421182561e-06, "loss": 0.8193, "step": 4858 }, { "epoch": 1.3404137931034483, "grad_norm": 3.8466107845306396, "learning_rate": 8.755462186621076e-06, "loss": 0.7553, "step": 4859 }, { "epoch": 1.340689655172414, "grad_norm": 3.8667593002319336, "learning_rate": 8.75485982703434e-06, "loss": 0.8128, "step": 4860 }, { "epoch": 1.3409655172413792, "grad_norm": 3.881594657897949, "learning_rate": 8.754257342442404e-06, "loss": 0.7922, "step": 4861 }, { "epoch": 1.3412413793103448, "grad_norm": 4.287991046905518, "learning_rate": 8.753654732865333e-06, "loss": 0.8006, "step": 4862 }, { "epoch": 1.3415172413793104, "grad_norm": 3.768427848815918, "learning_rate": 8.753051998323186e-06, "loss": 0.7399, "step": 4863 }, { "epoch": 1.3417931034482757, "grad_norm": 3.6430068016052246, "learning_rate": 8.752449138836027e-06, "loss": 0.7782, "step": 4864 }, { "epoch": 1.3420689655172413, "grad_norm": 4.134397029876709, "learning_rate": 8.751846154423926e-06, "loss": 0.7651, "step": 4865 }, { "epoch": 1.3423448275862069, "grad_norm": 4.449426651000977, "learning_rate": 8.75124304510696e-06, "loss": 0.8632, "step": 4866 }, { "epoch": 1.3426206896551725, "grad_norm": 4.148811340332031, "learning_rate": 8.750639810905207e-06, "loss": 0.6652, "step": 4867 }, { "epoch": 1.342896551724138, "grad_norm": 3.929067611694336, "learning_rate": 8.750036451838748e-06, "loss": 0.8206, "step": 4868 }, { "epoch": 1.3431724137931034, "grad_norm": 3.846433162689209, "learning_rate": 8.74943296792767e-06, "loss": 0.889, "step": 4869 }, { "epoch": 1.343448275862069, "grad_norm": 4.092860221862793, "learning_rate": 8.748829359192066e-06, "loss": 0.8497, "step": 4870 }, { "epoch": 1.3437241379310345, "grad_norm": 4.3272294998168945, "learning_rate": 8.748225625652027e-06, "loss": 0.8324, "step": 4871 }, { "epoch": 1.3439999999999999, "grad_norm": 4.342308044433594, "learning_rate": 8.747621767327657e-06, "loss": 0.8832, "step": 4872 }, { "epoch": 1.3442758620689657, "grad_norm": 4.13555383682251, "learning_rate": 8.747017784239055e-06, "loss": 0.8391, "step": 4873 }, { "epoch": 1.344551724137931, "grad_norm": 3.721726894378662, "learning_rate": 8.746413676406333e-06, "loss": 0.9047, "step": 4874 }, { "epoch": 1.3448275862068966, "grad_norm": 4.037469863891602, "learning_rate": 8.7458094438496e-06, "loss": 0.922, "step": 4875 }, { "epoch": 1.3451034482758621, "grad_norm": 3.729329824447632, "learning_rate": 8.745205086588972e-06, "loss": 0.8654, "step": 4876 }, { "epoch": 1.3453793103448275, "grad_norm": 4.040174961090088, "learning_rate": 8.744600604644569e-06, "loss": 0.8151, "step": 4877 }, { "epoch": 1.345655172413793, "grad_norm": 3.88995623588562, "learning_rate": 8.743995998036515e-06, "loss": 0.8304, "step": 4878 }, { "epoch": 1.3459310344827586, "grad_norm": 3.86293363571167, "learning_rate": 8.74339126678494e-06, "loss": 0.9155, "step": 4879 }, { "epoch": 1.3462068965517242, "grad_norm": 3.5589540004730225, "learning_rate": 8.742786410909974e-06, "loss": 0.7306, "step": 4880 }, { "epoch": 1.3464827586206898, "grad_norm": 3.564767599105835, "learning_rate": 8.742181430431756e-06, "loss": 0.6882, "step": 4881 }, { "epoch": 1.3467586206896551, "grad_norm": 3.6803996562957764, "learning_rate": 8.741576325370424e-06, "loss": 0.686, "step": 4882 }, { "epoch": 1.3470344827586207, "grad_norm": 4.1371331214904785, "learning_rate": 8.740971095746126e-06, "loss": 0.8575, "step": 4883 }, { "epoch": 1.3473103448275863, "grad_norm": 4.107537269592285, "learning_rate": 8.740365741579008e-06, "loss": 0.793, "step": 4884 }, { "epoch": 1.3475862068965516, "grad_norm": 3.3864455223083496, "learning_rate": 8.739760262889226e-06, "loss": 0.6366, "step": 4885 }, { "epoch": 1.3478620689655172, "grad_norm": 3.923081159591675, "learning_rate": 8.739154659696935e-06, "loss": 0.7919, "step": 4886 }, { "epoch": 1.3481379310344828, "grad_norm": 3.775231122970581, "learning_rate": 8.738548932022298e-06, "loss": 0.8245, "step": 4887 }, { "epoch": 1.3484137931034483, "grad_norm": 4.479916572570801, "learning_rate": 8.73794307988548e-06, "loss": 0.7965, "step": 4888 }, { "epoch": 1.348689655172414, "grad_norm": 4.2460222244262695, "learning_rate": 8.737337103306652e-06, "loss": 0.8866, "step": 4889 }, { "epoch": 1.3489655172413793, "grad_norm": 3.9971492290496826, "learning_rate": 8.736731002305987e-06, "loss": 0.7869, "step": 4890 }, { "epoch": 1.3492413793103448, "grad_norm": 3.9409449100494385, "learning_rate": 8.73612477690366e-06, "loss": 0.7133, "step": 4891 }, { "epoch": 1.3495172413793104, "grad_norm": 4.2354912757873535, "learning_rate": 8.735518427119859e-06, "loss": 0.804, "step": 4892 }, { "epoch": 1.3497931034482757, "grad_norm": 4.278940200805664, "learning_rate": 8.734911952974765e-06, "loss": 0.871, "step": 4893 }, { "epoch": 1.3500689655172413, "grad_norm": 4.10212516784668, "learning_rate": 8.734305354488573e-06, "loss": 0.8337, "step": 4894 }, { "epoch": 1.3503448275862069, "grad_norm": 3.9599218368530273, "learning_rate": 8.733698631681475e-06, "loss": 0.782, "step": 4895 }, { "epoch": 1.3506206896551725, "grad_norm": 4.0322465896606445, "learning_rate": 8.733091784573671e-06, "loss": 0.8077, "step": 4896 }, { "epoch": 1.350896551724138, "grad_norm": 4.3775529861450195, "learning_rate": 8.732484813185361e-06, "loss": 0.9958, "step": 4897 }, { "epoch": 1.3511724137931034, "grad_norm": 3.7048847675323486, "learning_rate": 8.731877717536757e-06, "loss": 0.718, "step": 4898 }, { "epoch": 1.351448275862069, "grad_norm": 3.5800294876098633, "learning_rate": 8.731270497648066e-06, "loss": 0.8357, "step": 4899 }, { "epoch": 1.3517241379310345, "grad_norm": 3.934037208557129, "learning_rate": 8.730663153539504e-06, "loss": 0.7595, "step": 4900 }, { "epoch": 1.3519999999999999, "grad_norm": 3.9101860523223877, "learning_rate": 8.73005568523129e-06, "loss": 0.7924, "step": 4901 }, { "epoch": 1.3522758620689654, "grad_norm": 4.603801727294922, "learning_rate": 8.729448092743651e-06, "loss": 0.8182, "step": 4902 }, { "epoch": 1.352551724137931, "grad_norm": 3.711812973022461, "learning_rate": 8.728840376096813e-06, "loss": 0.7961, "step": 4903 }, { "epoch": 1.3528275862068966, "grad_norm": 4.199428558349609, "learning_rate": 8.728232535311006e-06, "loss": 0.8421, "step": 4904 }, { "epoch": 1.3531034482758622, "grad_norm": 4.005481243133545, "learning_rate": 8.727624570406467e-06, "loss": 0.8395, "step": 4905 }, { "epoch": 1.3533793103448275, "grad_norm": 4.015352249145508, "learning_rate": 8.727016481403437e-06, "loss": 0.8469, "step": 4906 }, { "epoch": 1.353655172413793, "grad_norm": 3.6481730937957764, "learning_rate": 8.72640826832216e-06, "loss": 0.7852, "step": 4907 }, { "epoch": 1.3539310344827586, "grad_norm": 4.368005752563477, "learning_rate": 8.725799931182881e-06, "loss": 0.9302, "step": 4908 }, { "epoch": 1.3542068965517242, "grad_norm": 3.8021907806396484, "learning_rate": 8.725191470005858e-06, "loss": 0.8458, "step": 4909 }, { "epoch": 1.3544827586206898, "grad_norm": 3.6369476318359375, "learning_rate": 8.724582884811345e-06, "loss": 0.7031, "step": 4910 }, { "epoch": 1.3547586206896551, "grad_norm": 3.627743721008301, "learning_rate": 8.723974175619604e-06, "loss": 0.8629, "step": 4911 }, { "epoch": 1.3550344827586207, "grad_norm": 3.9282166957855225, "learning_rate": 8.723365342450895e-06, "loss": 0.8483, "step": 4912 }, { "epoch": 1.3553103448275863, "grad_norm": 4.071451663970947, "learning_rate": 8.722756385325495e-06, "loss": 0.8582, "step": 4913 }, { "epoch": 1.3555862068965516, "grad_norm": 4.218560695648193, "learning_rate": 8.72214730426367e-06, "loss": 0.8221, "step": 4914 }, { "epoch": 1.3558620689655172, "grad_norm": 4.074365615844727, "learning_rate": 8.721538099285702e-06, "loss": 0.8535, "step": 4915 }, { "epoch": 1.3561379310344828, "grad_norm": 3.8682830333709717, "learning_rate": 8.72092877041187e-06, "loss": 0.8352, "step": 4916 }, { "epoch": 1.3564137931034483, "grad_norm": 4.27147102355957, "learning_rate": 8.72031931766246e-06, "loss": 0.7384, "step": 4917 }, { "epoch": 1.356689655172414, "grad_norm": 3.958463668823242, "learning_rate": 8.719709741057762e-06, "loss": 0.813, "step": 4918 }, { "epoch": 1.3569655172413793, "grad_norm": 4.228238582611084, "learning_rate": 8.71910004061807e-06, "loss": 0.8434, "step": 4919 }, { "epoch": 1.3572413793103448, "grad_norm": 3.9185123443603516, "learning_rate": 8.71849021636368e-06, "loss": 0.6986, "step": 4920 }, { "epoch": 1.3575172413793104, "grad_norm": 4.09182596206665, "learning_rate": 8.717880268314895e-06, "loss": 0.8679, "step": 4921 }, { "epoch": 1.3577931034482758, "grad_norm": 4.327602863311768, "learning_rate": 8.717270196492023e-06, "loss": 0.8034, "step": 4922 }, { "epoch": 1.3580689655172413, "grad_norm": 3.9234251976013184, "learning_rate": 8.716660000915371e-06, "loss": 0.8302, "step": 4923 }, { "epoch": 1.358344827586207, "grad_norm": 3.6005375385284424, "learning_rate": 8.716049681605256e-06, "loss": 0.7761, "step": 4924 }, { "epoch": 1.3586206896551725, "grad_norm": 4.1266937255859375, "learning_rate": 8.715439238581994e-06, "loss": 0.8795, "step": 4925 }, { "epoch": 1.358896551724138, "grad_norm": 4.009225845336914, "learning_rate": 8.714828671865914e-06, "loss": 0.8326, "step": 4926 }, { "epoch": 1.3591724137931034, "grad_norm": 3.9909770488739014, "learning_rate": 8.714217981477334e-06, "loss": 0.841, "step": 4927 }, { "epoch": 1.359448275862069, "grad_norm": 3.668410301208496, "learning_rate": 8.71360716743659e-06, "loss": 0.8067, "step": 4928 }, { "epoch": 1.3597241379310345, "grad_norm": 3.570892572402954, "learning_rate": 8.712996229764016e-06, "loss": 0.7313, "step": 4929 }, { "epoch": 1.3599999999999999, "grad_norm": 4.234886169433594, "learning_rate": 8.71238516847995e-06, "loss": 0.8672, "step": 4930 }, { "epoch": 1.3602758620689654, "grad_norm": 3.8675339221954346, "learning_rate": 8.711773983604736e-06, "loss": 0.9478, "step": 4931 }, { "epoch": 1.360551724137931, "grad_norm": 4.095025539398193, "learning_rate": 8.711162675158722e-06, "loss": 0.7723, "step": 4932 }, { "epoch": 1.3608275862068966, "grad_norm": 3.7672224044799805, "learning_rate": 8.71055124316226e-06, "loss": 0.6832, "step": 4933 }, { "epoch": 1.3611034482758622, "grad_norm": 4.017242431640625, "learning_rate": 8.709939687635702e-06, "loss": 0.8194, "step": 4934 }, { "epoch": 1.3613793103448275, "grad_norm": 3.6085009574890137, "learning_rate": 8.70932800859941e-06, "loss": 0.7362, "step": 4935 }, { "epoch": 1.361655172413793, "grad_norm": 3.8409311771392822, "learning_rate": 8.708716206073748e-06, "loss": 0.6977, "step": 4936 }, { "epoch": 1.3619310344827587, "grad_norm": 3.802809476852417, "learning_rate": 8.708104280079086e-06, "loss": 0.9174, "step": 4937 }, { "epoch": 1.362206896551724, "grad_norm": 4.135040760040283, "learning_rate": 8.707492230635791e-06, "loss": 0.8909, "step": 4938 }, { "epoch": 1.3624827586206898, "grad_norm": 4.151402473449707, "learning_rate": 8.706880057764242e-06, "loss": 0.7673, "step": 4939 }, { "epoch": 1.3627586206896551, "grad_norm": 3.477747917175293, "learning_rate": 8.706267761484818e-06, "loss": 0.7366, "step": 4940 }, { "epoch": 1.3630344827586207, "grad_norm": 3.9934260845184326, "learning_rate": 8.705655341817907e-06, "loss": 0.921, "step": 4941 }, { "epoch": 1.3633103448275863, "grad_norm": 4.0545973777771, "learning_rate": 8.705042798783893e-06, "loss": 0.8059, "step": 4942 }, { "epoch": 1.3635862068965516, "grad_norm": 3.881474256515503, "learning_rate": 8.70443013240317e-06, "loss": 0.8749, "step": 4943 }, { "epoch": 1.3638620689655172, "grad_norm": 3.6977808475494385, "learning_rate": 8.703817342696135e-06, "loss": 0.7451, "step": 4944 }, { "epoch": 1.3641379310344828, "grad_norm": 3.891000747680664, "learning_rate": 8.703204429683188e-06, "loss": 0.7622, "step": 4945 }, { "epoch": 1.3644137931034483, "grad_norm": 4.251009941101074, "learning_rate": 8.702591393384734e-06, "loss": 0.9599, "step": 4946 }, { "epoch": 1.364689655172414, "grad_norm": 4.085383892059326, "learning_rate": 8.701978233821182e-06, "loss": 0.7565, "step": 4947 }, { "epoch": 1.3649655172413793, "grad_norm": 3.949845790863037, "learning_rate": 8.701364951012948e-06, "loss": 0.8039, "step": 4948 }, { "epoch": 1.3652413793103448, "grad_norm": 3.945739507675171, "learning_rate": 8.700751544980442e-06, "loss": 0.9505, "step": 4949 }, { "epoch": 1.3655172413793104, "grad_norm": 3.083810806274414, "learning_rate": 8.700138015744093e-06, "loss": 0.6339, "step": 4950 }, { "epoch": 1.3657931034482758, "grad_norm": 4.648484706878662, "learning_rate": 8.699524363324323e-06, "loss": 0.9349, "step": 4951 }, { "epoch": 1.3660689655172413, "grad_norm": 3.890049695968628, "learning_rate": 8.69891058774156e-06, "loss": 0.7588, "step": 4952 }, { "epoch": 1.366344827586207, "grad_norm": 3.6854324340820312, "learning_rate": 8.698296689016239e-06, "loss": 0.7427, "step": 4953 }, { "epoch": 1.3666206896551725, "grad_norm": 3.6425070762634277, "learning_rate": 8.697682667168798e-06, "loss": 0.7794, "step": 4954 }, { "epoch": 1.366896551724138, "grad_norm": 4.0993876457214355, "learning_rate": 8.697068522219679e-06, "loss": 0.7984, "step": 4955 }, { "epoch": 1.3671724137931034, "grad_norm": 4.059582233428955, "learning_rate": 8.696454254189327e-06, "loss": 0.9568, "step": 4956 }, { "epoch": 1.367448275862069, "grad_norm": 3.489938497543335, "learning_rate": 8.695839863098192e-06, "loss": 0.8217, "step": 4957 }, { "epoch": 1.3677241379310345, "grad_norm": 3.739356279373169, "learning_rate": 8.69522534896673e-06, "loss": 0.7841, "step": 4958 }, { "epoch": 1.3679999999999999, "grad_norm": 4.12481164932251, "learning_rate": 8.694610711815395e-06, "loss": 0.7658, "step": 4959 }, { "epoch": 1.3682758620689655, "grad_norm": 3.8134446144104004, "learning_rate": 8.693995951664654e-06, "loss": 0.7183, "step": 4960 }, { "epoch": 1.368551724137931, "grad_norm": 4.23809814453125, "learning_rate": 8.693381068534969e-06, "loss": 0.7534, "step": 4961 }, { "epoch": 1.3688275862068966, "grad_norm": 3.3521499633789062, "learning_rate": 8.692766062446814e-06, "loss": 0.7775, "step": 4962 }, { "epoch": 1.3691034482758622, "grad_norm": 4.234969139099121, "learning_rate": 8.69215093342066e-06, "loss": 0.8267, "step": 4963 }, { "epoch": 1.3693793103448275, "grad_norm": 3.542220115661621, "learning_rate": 8.691535681476989e-06, "loss": 0.7681, "step": 4964 }, { "epoch": 1.369655172413793, "grad_norm": 3.7598462104797363, "learning_rate": 8.69092030663628e-06, "loss": 0.7589, "step": 4965 }, { "epoch": 1.3699310344827587, "grad_norm": 4.12684440612793, "learning_rate": 8.690304808919026e-06, "loss": 0.8184, "step": 4966 }, { "epoch": 1.370206896551724, "grad_norm": 3.6953885555267334, "learning_rate": 8.689689188345712e-06, "loss": 0.7887, "step": 4967 }, { "epoch": 1.3704827586206896, "grad_norm": 3.8955881595611572, "learning_rate": 8.689073444936835e-06, "loss": 0.7668, "step": 4968 }, { "epoch": 1.3707586206896551, "grad_norm": 3.9323532581329346, "learning_rate": 8.688457578712895e-06, "loss": 0.829, "step": 4969 }, { "epoch": 1.3710344827586207, "grad_norm": 3.66694974899292, "learning_rate": 8.687841589694393e-06, "loss": 0.6274, "step": 4970 }, { "epoch": 1.3713103448275863, "grad_norm": 4.119180202484131, "learning_rate": 8.687225477901837e-06, "loss": 0.6857, "step": 4971 }, { "epoch": 1.3715862068965516, "grad_norm": 4.431857585906982, "learning_rate": 8.68660924335574e-06, "loss": 0.8543, "step": 4972 }, { "epoch": 1.3718620689655172, "grad_norm": 3.930230140686035, "learning_rate": 8.685992886076615e-06, "loss": 0.8094, "step": 4973 }, { "epoch": 1.3721379310344828, "grad_norm": 4.134672164916992, "learning_rate": 8.685376406084984e-06, "loss": 0.7774, "step": 4974 }, { "epoch": 1.3724137931034484, "grad_norm": 4.374873638153076, "learning_rate": 8.684759803401369e-06, "loss": 0.9202, "step": 4975 }, { "epoch": 1.372689655172414, "grad_norm": 3.9002745151519775, "learning_rate": 8.684143078046296e-06, "loss": 0.7551, "step": 4976 }, { "epoch": 1.3729655172413793, "grad_norm": 4.274795055389404, "learning_rate": 8.683526230040301e-06, "loss": 0.9085, "step": 4977 }, { "epoch": 1.3732413793103448, "grad_norm": 3.677938222885132, "learning_rate": 8.682909259403917e-06, "loss": 0.7223, "step": 4978 }, { "epoch": 1.3735172413793104, "grad_norm": 4.193158149719238, "learning_rate": 8.682292166157684e-06, "loss": 0.8427, "step": 4979 }, { "epoch": 1.3737931034482758, "grad_norm": 3.570761203765869, "learning_rate": 8.681674950322148e-06, "loss": 0.764, "step": 4980 }, { "epoch": 1.3740689655172413, "grad_norm": 3.9858226776123047, "learning_rate": 8.681057611917853e-06, "loss": 0.7707, "step": 4981 }, { "epoch": 1.374344827586207, "grad_norm": 4.037746429443359, "learning_rate": 8.680440150965354e-06, "loss": 0.8637, "step": 4982 }, { "epoch": 1.3746206896551725, "grad_norm": 3.4037365913391113, "learning_rate": 8.67982256748521e-06, "loss": 0.7524, "step": 4983 }, { "epoch": 1.374896551724138, "grad_norm": 4.066935062408447, "learning_rate": 8.679204861497977e-06, "loss": 0.7767, "step": 4984 }, { "epoch": 1.3751724137931034, "grad_norm": 4.112699031829834, "learning_rate": 8.678587033024218e-06, "loss": 0.8569, "step": 4985 }, { "epoch": 1.375448275862069, "grad_norm": 4.053591728210449, "learning_rate": 8.677969082084507e-06, "loss": 0.7849, "step": 4986 }, { "epoch": 1.3757241379310345, "grad_norm": 4.299979209899902, "learning_rate": 8.677351008699413e-06, "loss": 1.0197, "step": 4987 }, { "epoch": 1.376, "grad_norm": 4.223812580108643, "learning_rate": 8.676732812889514e-06, "loss": 0.9055, "step": 4988 }, { "epoch": 1.3762758620689655, "grad_norm": 3.5383968353271484, "learning_rate": 8.67611449467539e-06, "loss": 0.6557, "step": 4989 }, { "epoch": 1.376551724137931, "grad_norm": 4.577573299407959, "learning_rate": 8.675496054077623e-06, "loss": 0.8312, "step": 4990 }, { "epoch": 1.3768275862068966, "grad_norm": 3.839250087738037, "learning_rate": 8.674877491116807e-06, "loss": 0.8234, "step": 4991 }, { "epoch": 1.3771034482758622, "grad_norm": 3.8774309158325195, "learning_rate": 8.674258805813533e-06, "loss": 0.8347, "step": 4992 }, { "epoch": 1.3773793103448275, "grad_norm": 4.285971641540527, "learning_rate": 8.673639998188399e-06, "loss": 0.7076, "step": 4993 }, { "epoch": 1.377655172413793, "grad_norm": 4.0912675857543945, "learning_rate": 8.673021068262001e-06, "loss": 0.8678, "step": 4994 }, { "epoch": 1.3779310344827587, "grad_norm": 4.094265937805176, "learning_rate": 8.672402016054951e-06, "loss": 0.8706, "step": 4995 }, { "epoch": 1.378206896551724, "grad_norm": 3.883307456970215, "learning_rate": 8.671782841587855e-06, "loss": 0.8077, "step": 4996 }, { "epoch": 1.3784827586206896, "grad_norm": 4.07222843170166, "learning_rate": 8.671163544881324e-06, "loss": 0.8308, "step": 4997 }, { "epoch": 1.3787586206896552, "grad_norm": 4.126522064208984, "learning_rate": 8.67054412595598e-06, "loss": 0.7492, "step": 4998 }, { "epoch": 1.3790344827586207, "grad_norm": 3.7525885105133057, "learning_rate": 8.669924584832442e-06, "loss": 0.8606, "step": 4999 }, { "epoch": 1.3793103448275863, "grad_norm": 3.872603416442871, "learning_rate": 8.669304921531336e-06, "loss": 0.7834, "step": 5000 }, { "epoch": 1.3793103448275863, "eval_loss": 1.2801851034164429, "eval_runtime": 13.6563, "eval_samples_per_second": 29.29, "eval_steps_per_second": 3.661, "step": 5000 }, { "epoch": 1.3795862068965516, "grad_norm": 3.6627681255340576, "learning_rate": 8.66868513607329e-06, "loss": 0.7805, "step": 5001 }, { "epoch": 1.3798620689655172, "grad_norm": 4.0478925704956055, "learning_rate": 8.668065228478938e-06, "loss": 0.8031, "step": 5002 }, { "epoch": 1.3801379310344828, "grad_norm": 3.9824345111846924, "learning_rate": 8.667445198768923e-06, "loss": 0.8347, "step": 5003 }, { "epoch": 1.3804137931034481, "grad_norm": 4.718106269836426, "learning_rate": 8.666825046963879e-06, "loss": 1.0284, "step": 5004 }, { "epoch": 1.3806896551724137, "grad_norm": 4.18598747253418, "learning_rate": 8.666204773084455e-06, "loss": 0.7999, "step": 5005 }, { "epoch": 1.3809655172413793, "grad_norm": 3.721890926361084, "learning_rate": 8.665584377151301e-06, "loss": 0.9109, "step": 5006 }, { "epoch": 1.3812413793103449, "grad_norm": 3.851870059967041, "learning_rate": 8.664963859185074e-06, "loss": 0.7473, "step": 5007 }, { "epoch": 1.3815172413793104, "grad_norm": 4.000605583190918, "learning_rate": 8.664343219206427e-06, "loss": 0.7626, "step": 5008 }, { "epoch": 1.3817931034482758, "grad_norm": 3.818577527999878, "learning_rate": 8.663722457236023e-06, "loss": 0.7348, "step": 5009 }, { "epoch": 1.3820689655172413, "grad_norm": 3.564828872680664, "learning_rate": 8.663101573294531e-06, "loss": 0.8145, "step": 5010 }, { "epoch": 1.382344827586207, "grad_norm": 4.374670028686523, "learning_rate": 8.662480567402618e-06, "loss": 0.9714, "step": 5011 }, { "epoch": 1.3826206896551725, "grad_norm": 3.5130765438079834, "learning_rate": 8.661859439580962e-06, "loss": 0.7419, "step": 5012 }, { "epoch": 1.382896551724138, "grad_norm": 3.769798994064331, "learning_rate": 8.661238189850238e-06, "loss": 0.7261, "step": 5013 }, { "epoch": 1.3831724137931034, "grad_norm": 4.000157356262207, "learning_rate": 8.66061681823113e-06, "loss": 0.7762, "step": 5014 }, { "epoch": 1.383448275862069, "grad_norm": 3.748654842376709, "learning_rate": 8.659995324744324e-06, "loss": 0.8661, "step": 5015 }, { "epoch": 1.3837241379310345, "grad_norm": 4.097286224365234, "learning_rate": 8.65937370941051e-06, "loss": 0.7519, "step": 5016 }, { "epoch": 1.384, "grad_norm": 3.815864086151123, "learning_rate": 8.658751972250384e-06, "loss": 0.7903, "step": 5017 }, { "epoch": 1.3842758620689655, "grad_norm": 3.8360047340393066, "learning_rate": 8.65813011328464e-06, "loss": 0.7563, "step": 5018 }, { "epoch": 1.384551724137931, "grad_norm": 4.098167896270752, "learning_rate": 8.657508132533986e-06, "loss": 0.8096, "step": 5019 }, { "epoch": 1.3848275862068966, "grad_norm": 4.089303970336914, "learning_rate": 8.656886030019129e-06, "loss": 0.9182, "step": 5020 }, { "epoch": 1.3851034482758622, "grad_norm": 3.794959545135498, "learning_rate": 8.656263805760777e-06, "loss": 0.8104, "step": 5021 }, { "epoch": 1.3853793103448275, "grad_norm": 4.508564472198486, "learning_rate": 8.655641459779644e-06, "loss": 0.9396, "step": 5022 }, { "epoch": 1.385655172413793, "grad_norm": 3.9271249771118164, "learning_rate": 8.65501899209645e-06, "loss": 0.7759, "step": 5023 }, { "epoch": 1.3859310344827587, "grad_norm": 3.6054742336273193, "learning_rate": 8.654396402731922e-06, "loss": 0.8226, "step": 5024 }, { "epoch": 1.386206896551724, "grad_norm": 4.003469467163086, "learning_rate": 8.65377369170678e-06, "loss": 0.8321, "step": 5025 }, { "epoch": 1.3864827586206896, "grad_norm": 3.815140962600708, "learning_rate": 8.65315085904176e-06, "loss": 0.686, "step": 5026 }, { "epoch": 1.3867586206896552, "grad_norm": 4.091405868530273, "learning_rate": 8.652527904757597e-06, "loss": 0.9144, "step": 5027 }, { "epoch": 1.3870344827586207, "grad_norm": 4.480497360229492, "learning_rate": 8.651904828875026e-06, "loss": 0.8408, "step": 5028 }, { "epoch": 1.3873103448275863, "grad_norm": 3.8941690921783447, "learning_rate": 8.651281631414792e-06, "loss": 0.8921, "step": 5029 }, { "epoch": 1.3875862068965517, "grad_norm": 4.21024227142334, "learning_rate": 8.650658312397647e-06, "loss": 0.6898, "step": 5030 }, { "epoch": 1.3878620689655172, "grad_norm": 3.8912575244903564, "learning_rate": 8.650034871844334e-06, "loss": 0.7965, "step": 5031 }, { "epoch": 1.3881379310344828, "grad_norm": 4.162256240844727, "learning_rate": 8.649411309775614e-06, "loss": 0.8686, "step": 5032 }, { "epoch": 1.3884137931034481, "grad_norm": 3.884676456451416, "learning_rate": 8.648787626212246e-06, "loss": 0.8248, "step": 5033 }, { "epoch": 1.3886896551724137, "grad_norm": 3.8826189041137695, "learning_rate": 8.648163821174992e-06, "loss": 0.772, "step": 5034 }, { "epoch": 1.3889655172413793, "grad_norm": 4.314073085784912, "learning_rate": 8.64753989468462e-06, "loss": 0.8611, "step": 5035 }, { "epoch": 1.3892413793103449, "grad_norm": 3.8100478649139404, "learning_rate": 8.646915846761901e-06, "loss": 0.9019, "step": 5036 }, { "epoch": 1.3895172413793104, "grad_norm": 4.060506343841553, "learning_rate": 8.64629167742761e-06, "loss": 0.841, "step": 5037 }, { "epoch": 1.3897931034482758, "grad_norm": 4.098781585693359, "learning_rate": 8.645667386702528e-06, "loss": 0.9278, "step": 5038 }, { "epoch": 1.3900689655172414, "grad_norm": 4.178236961364746, "learning_rate": 8.645042974607438e-06, "loss": 0.8096, "step": 5039 }, { "epoch": 1.390344827586207, "grad_norm": 3.7184696197509766, "learning_rate": 8.644418441163127e-06, "loss": 0.7589, "step": 5040 }, { "epoch": 1.3906206896551725, "grad_norm": 4.188239097595215, "learning_rate": 8.643793786390388e-06, "loss": 0.7652, "step": 5041 }, { "epoch": 1.390896551724138, "grad_norm": 4.107025146484375, "learning_rate": 8.643169010310018e-06, "loss": 0.8188, "step": 5042 }, { "epoch": 1.3911724137931034, "grad_norm": 4.273369789123535, "learning_rate": 8.642544112942812e-06, "loss": 0.8992, "step": 5043 }, { "epoch": 1.391448275862069, "grad_norm": 3.9508278369903564, "learning_rate": 8.641919094309578e-06, "loss": 0.8929, "step": 5044 }, { "epoch": 1.3917241379310346, "grad_norm": 4.038426399230957, "learning_rate": 8.641293954431123e-06, "loss": 0.8926, "step": 5045 }, { "epoch": 1.392, "grad_norm": 4.7064056396484375, "learning_rate": 8.640668693328257e-06, "loss": 0.7801, "step": 5046 }, { "epoch": 1.3922758620689655, "grad_norm": 4.323269844055176, "learning_rate": 8.640043311021799e-06, "loss": 0.9108, "step": 5047 }, { "epoch": 1.392551724137931, "grad_norm": 3.6516776084899902, "learning_rate": 8.639417807532569e-06, "loss": 0.8835, "step": 5048 }, { "epoch": 1.3928275862068966, "grad_norm": 4.35417366027832, "learning_rate": 8.638792182881385e-06, "loss": 0.9212, "step": 5049 }, { "epoch": 1.3931034482758622, "grad_norm": 3.8384828567504883, "learning_rate": 8.638166437089084e-06, "loss": 0.7918, "step": 5050 }, { "epoch": 1.3933793103448275, "grad_norm": 3.9808197021484375, "learning_rate": 8.63754057017649e-06, "loss": 0.7716, "step": 5051 }, { "epoch": 1.393655172413793, "grad_norm": 3.9221291542053223, "learning_rate": 8.636914582164444e-06, "loss": 0.8398, "step": 5052 }, { "epoch": 1.3939310344827587, "grad_norm": 4.2337117195129395, "learning_rate": 8.636288473073786e-06, "loss": 0.9774, "step": 5053 }, { "epoch": 1.394206896551724, "grad_norm": 4.010715484619141, "learning_rate": 8.635662242925355e-06, "loss": 0.7182, "step": 5054 }, { "epoch": 1.3944827586206896, "grad_norm": 3.584815740585327, "learning_rate": 8.635035891740007e-06, "loss": 0.7719, "step": 5055 }, { "epoch": 1.3947586206896552, "grad_norm": 3.7055892944335938, "learning_rate": 8.634409419538588e-06, "loss": 0.7727, "step": 5056 }, { "epoch": 1.3950344827586207, "grad_norm": 3.8136298656463623, "learning_rate": 8.633782826341958e-06, "loss": 0.7512, "step": 5057 }, { "epoch": 1.3953103448275863, "grad_norm": 3.89814829826355, "learning_rate": 8.633156112170974e-06, "loss": 0.8269, "step": 5058 }, { "epoch": 1.3955862068965517, "grad_norm": 4.365345001220703, "learning_rate": 8.632529277046505e-06, "loss": 0.9473, "step": 5059 }, { "epoch": 1.3958620689655172, "grad_norm": 3.769930601119995, "learning_rate": 8.631902320989412e-06, "loss": 0.7607, "step": 5060 }, { "epoch": 1.3961379310344828, "grad_norm": 3.9215753078460693, "learning_rate": 8.631275244020575e-06, "loss": 0.7507, "step": 5061 }, { "epoch": 1.3964137931034482, "grad_norm": 4.5705437660217285, "learning_rate": 8.630648046160866e-06, "loss": 0.7221, "step": 5062 }, { "epoch": 1.3966896551724137, "grad_norm": 3.7655880451202393, "learning_rate": 8.630020727431168e-06, "loss": 0.7383, "step": 5063 }, { "epoch": 1.3969655172413793, "grad_norm": 4.1364617347717285, "learning_rate": 8.629393287852363e-06, "loss": 0.8331, "step": 5064 }, { "epoch": 1.3972413793103449, "grad_norm": 3.741260051727295, "learning_rate": 8.62876572744534e-06, "loss": 0.7843, "step": 5065 }, { "epoch": 1.3975172413793104, "grad_norm": 3.837181568145752, "learning_rate": 8.628138046230992e-06, "loss": 0.8415, "step": 5066 }, { "epoch": 1.3977931034482758, "grad_norm": 3.48117733001709, "learning_rate": 8.627510244230215e-06, "loss": 0.7453, "step": 5067 }, { "epoch": 1.3980689655172414, "grad_norm": 4.0341620445251465, "learning_rate": 8.62688232146391e-06, "loss": 0.8695, "step": 5068 }, { "epoch": 1.398344827586207, "grad_norm": 3.6884450912475586, "learning_rate": 8.626254277952984e-06, "loss": 0.745, "step": 5069 }, { "epoch": 1.3986206896551723, "grad_norm": 4.22247314453125, "learning_rate": 8.62562611371834e-06, "loss": 0.7667, "step": 5070 }, { "epoch": 1.3988965517241378, "grad_norm": 3.9854531288146973, "learning_rate": 8.624997828780895e-06, "loss": 0.8953, "step": 5071 }, { "epoch": 1.3991724137931034, "grad_norm": 3.5636532306671143, "learning_rate": 8.624369423161562e-06, "loss": 0.6643, "step": 5072 }, { "epoch": 1.399448275862069, "grad_norm": 4.13313102722168, "learning_rate": 8.623740896881268e-06, "loss": 0.8271, "step": 5073 }, { "epoch": 1.3997241379310346, "grad_norm": 3.8396482467651367, "learning_rate": 8.62311224996093e-06, "loss": 0.8148, "step": 5074 }, { "epoch": 1.4, "grad_norm": 3.5860660076141357, "learning_rate": 8.62248348242148e-06, "loss": 0.7684, "step": 5075 }, { "epoch": 1.4002758620689655, "grad_norm": 3.9477930068969727, "learning_rate": 8.621854594283852e-06, "loss": 0.8698, "step": 5076 }, { "epoch": 1.400551724137931, "grad_norm": 3.733555793762207, "learning_rate": 8.621225585568982e-06, "loss": 0.7049, "step": 5077 }, { "epoch": 1.4008275862068966, "grad_norm": 3.8429431915283203, "learning_rate": 8.620596456297808e-06, "loss": 0.7151, "step": 5078 }, { "epoch": 1.4011034482758622, "grad_norm": 3.7627604007720947, "learning_rate": 8.619967206491278e-06, "loss": 0.7762, "step": 5079 }, { "epoch": 1.4013793103448275, "grad_norm": 3.764509439468384, "learning_rate": 8.619337836170341e-06, "loss": 0.8005, "step": 5080 }, { "epoch": 1.4016551724137931, "grad_norm": 4.095116138458252, "learning_rate": 8.618708345355946e-06, "loss": 0.861, "step": 5081 }, { "epoch": 1.4019310344827587, "grad_norm": 4.092885971069336, "learning_rate": 8.618078734069053e-06, "loss": 0.7513, "step": 5082 }, { "epoch": 1.402206896551724, "grad_norm": 4.133827209472656, "learning_rate": 8.617449002330622e-06, "loss": 0.81, "step": 5083 }, { "epoch": 1.4024827586206896, "grad_norm": 3.7852814197540283, "learning_rate": 8.616819150161617e-06, "loss": 0.817, "step": 5084 }, { "epoch": 1.4027586206896552, "grad_norm": 4.474603176116943, "learning_rate": 8.616189177583006e-06, "loss": 0.9426, "step": 5085 }, { "epoch": 1.4030344827586207, "grad_norm": 3.524475574493408, "learning_rate": 8.615559084615766e-06, "loss": 0.7314, "step": 5086 }, { "epoch": 1.4033103448275863, "grad_norm": 4.093081951141357, "learning_rate": 8.614928871280867e-06, "loss": 0.8401, "step": 5087 }, { "epoch": 1.4035862068965517, "grad_norm": 4.070157527923584, "learning_rate": 8.614298537599297e-06, "loss": 0.7801, "step": 5088 }, { "epoch": 1.4038620689655172, "grad_norm": 4.129349708557129, "learning_rate": 8.613668083592036e-06, "loss": 0.8457, "step": 5089 }, { "epoch": 1.4041379310344828, "grad_norm": 4.382498264312744, "learning_rate": 8.613037509280074e-06, "loss": 0.7956, "step": 5090 }, { "epoch": 1.4044137931034482, "grad_norm": 3.97310471534729, "learning_rate": 8.612406814684405e-06, "loss": 0.7868, "step": 5091 }, { "epoch": 1.4046896551724137, "grad_norm": 4.036679267883301, "learning_rate": 8.611775999826025e-06, "loss": 0.8358, "step": 5092 }, { "epoch": 1.4049655172413793, "grad_norm": 3.641428232192993, "learning_rate": 8.611145064725934e-06, "loss": 0.7235, "step": 5093 }, { "epoch": 1.4052413793103449, "grad_norm": 3.281181812286377, "learning_rate": 8.610514009405137e-06, "loss": 0.6909, "step": 5094 }, { "epoch": 1.4055172413793104, "grad_norm": 3.979309320449829, "learning_rate": 8.609882833884644e-06, "loss": 0.7088, "step": 5095 }, { "epoch": 1.4057931034482758, "grad_norm": 3.680868148803711, "learning_rate": 8.609251538185468e-06, "loss": 0.7583, "step": 5096 }, { "epoch": 1.4060689655172414, "grad_norm": 3.8334758281707764, "learning_rate": 8.608620122328624e-06, "loss": 0.7512, "step": 5097 }, { "epoch": 1.406344827586207, "grad_norm": 4.037400722503662, "learning_rate": 8.607988586335132e-06, "loss": 0.8625, "step": 5098 }, { "epoch": 1.4066206896551723, "grad_norm": 4.0006866455078125, "learning_rate": 8.607356930226022e-06, "loss": 0.7746, "step": 5099 }, { "epoch": 1.4068965517241379, "grad_norm": 4.240856647491455, "learning_rate": 8.606725154022318e-06, "loss": 0.8737, "step": 5100 }, { "epoch": 1.4071724137931034, "grad_norm": 4.207639694213867, "learning_rate": 8.606093257745054e-06, "loss": 0.8274, "step": 5101 }, { "epoch": 1.407448275862069, "grad_norm": 4.051089286804199, "learning_rate": 8.605461241415267e-06, "loss": 0.8916, "step": 5102 }, { "epoch": 1.4077241379310346, "grad_norm": 3.482720375061035, "learning_rate": 8.604829105053999e-06, "loss": 0.7837, "step": 5103 }, { "epoch": 1.408, "grad_norm": 4.263370513916016, "learning_rate": 8.604196848682294e-06, "loss": 0.8689, "step": 5104 }, { "epoch": 1.4082758620689655, "grad_norm": 4.007618427276611, "learning_rate": 8.603564472321199e-06, "loss": 0.7567, "step": 5105 }, { "epoch": 1.408551724137931, "grad_norm": 4.050252914428711, "learning_rate": 8.602931975991769e-06, "loss": 0.8387, "step": 5106 }, { "epoch": 1.4088275862068966, "grad_norm": 3.860577344894409, "learning_rate": 8.60229935971506e-06, "loss": 0.7982, "step": 5107 }, { "epoch": 1.4091034482758622, "grad_norm": 3.7844362258911133, "learning_rate": 8.601666623512133e-06, "loss": 0.8335, "step": 5108 }, { "epoch": 1.4093793103448276, "grad_norm": 3.8871493339538574, "learning_rate": 8.601033767404052e-06, "loss": 0.735, "step": 5109 }, { "epoch": 1.4096551724137931, "grad_norm": 4.111547946929932, "learning_rate": 8.600400791411888e-06, "loss": 0.7851, "step": 5110 }, { "epoch": 1.4099310344827587, "grad_norm": 3.8530375957489014, "learning_rate": 8.599767695556712e-06, "loss": 0.7935, "step": 5111 }, { "epoch": 1.410206896551724, "grad_norm": 4.122220039367676, "learning_rate": 8.599134479859601e-06, "loss": 0.8146, "step": 5112 }, { "epoch": 1.4104827586206896, "grad_norm": 4.205873966217041, "learning_rate": 8.598501144341636e-06, "loss": 0.8153, "step": 5113 }, { "epoch": 1.4107586206896552, "grad_norm": 3.963916301727295, "learning_rate": 8.5978676890239e-06, "loss": 0.7397, "step": 5114 }, { "epoch": 1.4110344827586208, "grad_norm": 3.6657791137695312, "learning_rate": 8.597234113927486e-06, "loss": 0.7382, "step": 5115 }, { "epoch": 1.4113103448275863, "grad_norm": 3.6738808155059814, "learning_rate": 8.596600419073481e-06, "loss": 0.789, "step": 5116 }, { "epoch": 1.4115862068965517, "grad_norm": 3.8855035305023193, "learning_rate": 8.595966604482987e-06, "loss": 0.8055, "step": 5117 }, { "epoch": 1.4118620689655172, "grad_norm": 3.5625557899475098, "learning_rate": 8.595332670177101e-06, "loss": 0.8539, "step": 5118 }, { "epoch": 1.4121379310344828, "grad_norm": 4.202529430389404, "learning_rate": 8.594698616176928e-06, "loss": 0.8151, "step": 5119 }, { "epoch": 1.4124137931034482, "grad_norm": 3.7434303760528564, "learning_rate": 8.59406444250358e-06, "loss": 0.8071, "step": 5120 }, { "epoch": 1.4126896551724137, "grad_norm": 3.6968820095062256, "learning_rate": 8.593430149178165e-06, "loss": 0.877, "step": 5121 }, { "epoch": 1.4129655172413793, "grad_norm": 4.005212783813477, "learning_rate": 8.592795736221805e-06, "loss": 0.7998, "step": 5122 }, { "epoch": 1.4132413793103449, "grad_norm": 3.9694159030914307, "learning_rate": 8.592161203655615e-06, "loss": 0.7962, "step": 5123 }, { "epoch": 1.4135172413793105, "grad_norm": 3.79587721824646, "learning_rate": 8.591526551500723e-06, "loss": 0.8191, "step": 5124 }, { "epoch": 1.4137931034482758, "grad_norm": 3.978426456451416, "learning_rate": 8.590891779778257e-06, "loss": 0.7524, "step": 5125 }, { "epoch": 1.4140689655172414, "grad_norm": 4.228281021118164, "learning_rate": 8.590256888509348e-06, "loss": 0.9254, "step": 5126 }, { "epoch": 1.414344827586207, "grad_norm": 4.239936828613281, "learning_rate": 8.589621877715136e-06, "loss": 0.8745, "step": 5127 }, { "epoch": 1.4146206896551723, "grad_norm": 4.117417335510254, "learning_rate": 8.588986747416757e-06, "loss": 0.7228, "step": 5128 }, { "epoch": 1.4148965517241379, "grad_norm": 4.320788860321045, "learning_rate": 8.588351497635358e-06, "loss": 0.7815, "step": 5129 }, { "epoch": 1.4151724137931034, "grad_norm": 3.847702980041504, "learning_rate": 8.58771612839209e-06, "loss": 0.7461, "step": 5130 }, { "epoch": 1.415448275862069, "grad_norm": 3.857893466949463, "learning_rate": 8.5870806397081e-06, "loss": 0.9189, "step": 5131 }, { "epoch": 1.4157241379310346, "grad_norm": 4.181521892547607, "learning_rate": 8.586445031604547e-06, "loss": 0.7996, "step": 5132 }, { "epoch": 1.416, "grad_norm": 4.129056453704834, "learning_rate": 8.585809304102592e-06, "loss": 0.8561, "step": 5133 }, { "epoch": 1.4162758620689655, "grad_norm": 3.292056083679199, "learning_rate": 8.585173457223401e-06, "loss": 0.7379, "step": 5134 }, { "epoch": 1.416551724137931, "grad_norm": 3.9084489345550537, "learning_rate": 8.584537490988137e-06, "loss": 0.8297, "step": 5135 }, { "epoch": 1.4168275862068964, "grad_norm": 3.4426558017730713, "learning_rate": 8.583901405417979e-06, "loss": 0.6928, "step": 5136 }, { "epoch": 1.417103448275862, "grad_norm": 4.253856182098389, "learning_rate": 8.583265200534097e-06, "loss": 0.7799, "step": 5137 }, { "epoch": 1.4173793103448276, "grad_norm": 4.243348121643066, "learning_rate": 8.582628876357675e-06, "loss": 0.7407, "step": 5138 }, { "epoch": 1.4176551724137931, "grad_norm": 4.008455276489258, "learning_rate": 8.581992432909896e-06, "loss": 0.7655, "step": 5139 }, { "epoch": 1.4179310344827587, "grad_norm": 3.916837453842163, "learning_rate": 8.581355870211949e-06, "loss": 0.7249, "step": 5140 }, { "epoch": 1.418206896551724, "grad_norm": 3.7899742126464844, "learning_rate": 8.580719188285025e-06, "loss": 0.8839, "step": 5141 }, { "epoch": 1.4184827586206896, "grad_norm": 3.7822070121765137, "learning_rate": 8.580082387150322e-06, "loss": 0.7082, "step": 5142 }, { "epoch": 1.4187586206896552, "grad_norm": 3.829902410507202, "learning_rate": 8.579445466829037e-06, "loss": 0.7738, "step": 5143 }, { "epoch": 1.4190344827586208, "grad_norm": 3.590743064880371, "learning_rate": 8.578808427342378e-06, "loss": 0.9284, "step": 5144 }, { "epoch": 1.4193103448275863, "grad_norm": 4.342691898345947, "learning_rate": 8.57817126871155e-06, "loss": 0.9118, "step": 5145 }, { "epoch": 1.4195862068965517, "grad_norm": 3.718991279602051, "learning_rate": 8.577533990957766e-06, "loss": 0.7111, "step": 5146 }, { "epoch": 1.4198620689655173, "grad_norm": 3.521301746368408, "learning_rate": 8.576896594102241e-06, "loss": 0.8682, "step": 5147 }, { "epoch": 1.4201379310344828, "grad_norm": 3.867446184158325, "learning_rate": 8.576259078166198e-06, "loss": 0.8532, "step": 5148 }, { "epoch": 1.4204137931034482, "grad_norm": 4.2427215576171875, "learning_rate": 8.575621443170859e-06, "loss": 0.7841, "step": 5149 }, { "epoch": 1.4206896551724137, "grad_norm": 3.876599073410034, "learning_rate": 8.574983689137453e-06, "loss": 0.7763, "step": 5150 }, { "epoch": 1.4209655172413793, "grad_norm": 4.344914436340332, "learning_rate": 8.574345816087208e-06, "loss": 0.883, "step": 5151 }, { "epoch": 1.4212413793103449, "grad_norm": 3.9744045734405518, "learning_rate": 8.573707824041364e-06, "loss": 0.837, "step": 5152 }, { "epoch": 1.4215172413793105, "grad_norm": 4.0683417320251465, "learning_rate": 8.573069713021159e-06, "loss": 0.8598, "step": 5153 }, { "epoch": 1.4217931034482758, "grad_norm": 4.07261848449707, "learning_rate": 8.572431483047836e-06, "loss": 0.8545, "step": 5154 }, { "epoch": 1.4220689655172414, "grad_norm": 4.216121673583984, "learning_rate": 8.571793134142643e-06, "loss": 0.8135, "step": 5155 }, { "epoch": 1.422344827586207, "grad_norm": 4.144855499267578, "learning_rate": 8.571154666326834e-06, "loss": 0.8206, "step": 5156 }, { "epoch": 1.4226206896551723, "grad_norm": 3.8271989822387695, "learning_rate": 8.570516079621662e-06, "loss": 0.7798, "step": 5157 }, { "epoch": 1.4228965517241379, "grad_norm": 3.9937381744384766, "learning_rate": 8.569877374048387e-06, "loss": 0.942, "step": 5158 }, { "epoch": 1.4231724137931034, "grad_norm": 3.464547872543335, "learning_rate": 8.569238549628273e-06, "loss": 0.6413, "step": 5159 }, { "epoch": 1.423448275862069, "grad_norm": 3.4017508029937744, "learning_rate": 8.568599606382588e-06, "loss": 0.7372, "step": 5160 }, { "epoch": 1.4237241379310346, "grad_norm": 4.060879230499268, "learning_rate": 8.567960544332604e-06, "loss": 0.8398, "step": 5161 }, { "epoch": 1.424, "grad_norm": 4.494071960449219, "learning_rate": 8.567321363499594e-06, "loss": 0.8444, "step": 5162 }, { "epoch": 1.4242758620689655, "grad_norm": 3.947373151779175, "learning_rate": 8.566682063904839e-06, "loss": 0.8171, "step": 5163 }, { "epoch": 1.424551724137931, "grad_norm": 3.7841529846191406, "learning_rate": 8.56604264556962e-06, "loss": 0.704, "step": 5164 }, { "epoch": 1.4248275862068964, "grad_norm": 4.065725326538086, "learning_rate": 8.565403108515228e-06, "loss": 0.647, "step": 5165 }, { "epoch": 1.425103448275862, "grad_norm": 3.8247361183166504, "learning_rate": 8.564763452762953e-06, "loss": 0.7803, "step": 5166 }, { "epoch": 1.4253793103448276, "grad_norm": 3.5528769493103027, "learning_rate": 8.564123678334087e-06, "loss": 0.7698, "step": 5167 }, { "epoch": 1.4256551724137931, "grad_norm": 4.40044641494751, "learning_rate": 8.56348378524993e-06, "loss": 0.7978, "step": 5168 }, { "epoch": 1.4259310344827587, "grad_norm": 3.968834638595581, "learning_rate": 8.56284377353179e-06, "loss": 0.7735, "step": 5169 }, { "epoch": 1.426206896551724, "grad_norm": 4.313694477081299, "learning_rate": 8.562203643200971e-06, "loss": 0.8819, "step": 5170 }, { "epoch": 1.4264827586206896, "grad_norm": 3.8943216800689697, "learning_rate": 8.561563394278782e-06, "loss": 0.807, "step": 5171 }, { "epoch": 1.4267586206896552, "grad_norm": 3.742889165878296, "learning_rate": 8.560923026786539e-06, "loss": 0.7251, "step": 5172 }, { "epoch": 1.4270344827586208, "grad_norm": 4.375545978546143, "learning_rate": 8.560282540745562e-06, "loss": 0.9401, "step": 5173 }, { "epoch": 1.4273103448275863, "grad_norm": 3.8610146045684814, "learning_rate": 8.559641936177172e-06, "loss": 0.7845, "step": 5174 }, { "epoch": 1.4275862068965517, "grad_norm": 3.794928789138794, "learning_rate": 8.559001213102698e-06, "loss": 0.7644, "step": 5175 }, { "epoch": 1.4278620689655173, "grad_norm": 3.8990113735198975, "learning_rate": 8.558360371543469e-06, "loss": 0.8091, "step": 5176 }, { "epoch": 1.4281379310344828, "grad_norm": 3.8820295333862305, "learning_rate": 8.55771941152082e-06, "loss": 0.7243, "step": 5177 }, { "epoch": 1.4284137931034482, "grad_norm": 3.878171920776367, "learning_rate": 8.557078333056089e-06, "loss": 0.7594, "step": 5178 }, { "epoch": 1.4286896551724138, "grad_norm": 4.1053690910339355, "learning_rate": 8.55643713617062e-06, "loss": 0.9473, "step": 5179 }, { "epoch": 1.4289655172413793, "grad_norm": 3.6989545822143555, "learning_rate": 8.55579582088576e-06, "loss": 0.7575, "step": 5180 }, { "epoch": 1.429241379310345, "grad_norm": 4.323761940002441, "learning_rate": 8.555154387222855e-06, "loss": 0.7407, "step": 5181 }, { "epoch": 1.4295172413793105, "grad_norm": 3.6967880725860596, "learning_rate": 8.554512835203263e-06, "loss": 0.7475, "step": 5182 }, { "epoch": 1.4297931034482758, "grad_norm": 4.618602752685547, "learning_rate": 8.553871164848343e-06, "loss": 0.8823, "step": 5183 }, { "epoch": 1.4300689655172414, "grad_norm": 4.061477184295654, "learning_rate": 8.553229376179455e-06, "loss": 0.8487, "step": 5184 }, { "epoch": 1.430344827586207, "grad_norm": 4.28924560546875, "learning_rate": 8.552587469217966e-06, "loss": 0.7633, "step": 5185 }, { "epoch": 1.4306206896551723, "grad_norm": 3.810835123062134, "learning_rate": 8.551945443985245e-06, "loss": 0.7475, "step": 5186 }, { "epoch": 1.4308965517241379, "grad_norm": 4.0556206703186035, "learning_rate": 8.551303300502669e-06, "loss": 0.9084, "step": 5187 }, { "epoch": 1.4311724137931034, "grad_norm": 3.738845109939575, "learning_rate": 8.550661038791613e-06, "loss": 0.7742, "step": 5188 }, { "epoch": 1.431448275862069, "grad_norm": 3.9554619789123535, "learning_rate": 8.55001865887346e-06, "loss": 0.719, "step": 5189 }, { "epoch": 1.4317241379310346, "grad_norm": 4.12440299987793, "learning_rate": 8.549376160769597e-06, "loss": 0.9121, "step": 5190 }, { "epoch": 1.432, "grad_norm": 3.9721927642822266, "learning_rate": 8.54873354450141e-06, "loss": 0.8632, "step": 5191 }, { "epoch": 1.4322758620689655, "grad_norm": 3.889692783355713, "learning_rate": 8.5480908100903e-06, "loss": 0.8847, "step": 5192 }, { "epoch": 1.432551724137931, "grad_norm": 3.890777111053467, "learning_rate": 8.547447957557656e-06, "loss": 0.9291, "step": 5193 }, { "epoch": 1.4328275862068964, "grad_norm": 4.516279220581055, "learning_rate": 8.546804986924884e-06, "loss": 0.9116, "step": 5194 }, { "epoch": 1.433103448275862, "grad_norm": 3.797240734100342, "learning_rate": 8.54616189821339e-06, "loss": 0.882, "step": 5195 }, { "epoch": 1.4333793103448276, "grad_norm": 3.7454357147216797, "learning_rate": 8.545518691444581e-06, "loss": 0.804, "step": 5196 }, { "epoch": 1.4336551724137931, "grad_norm": 3.534898519515991, "learning_rate": 8.544875366639873e-06, "loss": 0.9169, "step": 5197 }, { "epoch": 1.4339310344827587, "grad_norm": 3.9739091396331787, "learning_rate": 8.544231923820684e-06, "loss": 0.8237, "step": 5198 }, { "epoch": 1.434206896551724, "grad_norm": 3.6192033290863037, "learning_rate": 8.543588363008429e-06, "loss": 0.8711, "step": 5199 }, { "epoch": 1.4344827586206896, "grad_norm": 3.829702615737915, "learning_rate": 8.542944684224539e-06, "loss": 0.8697, "step": 5200 }, { "epoch": 1.4347586206896552, "grad_norm": 3.8600313663482666, "learning_rate": 8.542300887490442e-06, "loss": 0.7701, "step": 5201 }, { "epoch": 1.4350344827586206, "grad_norm": 3.8791048526763916, "learning_rate": 8.541656972827572e-06, "loss": 0.8079, "step": 5202 }, { "epoch": 1.4353103448275861, "grad_norm": 3.625732660293579, "learning_rate": 8.541012940257362e-06, "loss": 0.8127, "step": 5203 }, { "epoch": 1.4355862068965517, "grad_norm": 3.8073513507843018, "learning_rate": 8.540368789801258e-06, "loss": 0.7803, "step": 5204 }, { "epoch": 1.4358620689655173, "grad_norm": 3.7537450790405273, "learning_rate": 8.5397245214807e-06, "loss": 0.8425, "step": 5205 }, { "epoch": 1.4361379310344828, "grad_norm": 4.307269096374512, "learning_rate": 8.53908013531714e-06, "loss": 0.7312, "step": 5206 }, { "epoch": 1.4364137931034482, "grad_norm": 3.9604651927948, "learning_rate": 8.53843563133203e-06, "loss": 0.8811, "step": 5207 }, { "epoch": 1.4366896551724138, "grad_norm": 3.7521133422851562, "learning_rate": 8.537791009546826e-06, "loss": 0.7573, "step": 5208 }, { "epoch": 1.4369655172413793, "grad_norm": 4.297585964202881, "learning_rate": 8.537146269982986e-06, "loss": 0.8696, "step": 5209 }, { "epoch": 1.437241379310345, "grad_norm": 4.03660249710083, "learning_rate": 8.53650141266198e-06, "loss": 0.6808, "step": 5210 }, { "epoch": 1.4375172413793105, "grad_norm": 3.980076551437378, "learning_rate": 8.53585643760527e-06, "loss": 0.8121, "step": 5211 }, { "epoch": 1.4377931034482758, "grad_norm": 3.8805196285247803, "learning_rate": 8.535211344834334e-06, "loss": 0.868, "step": 5212 }, { "epoch": 1.4380689655172414, "grad_norm": 3.8895456790924072, "learning_rate": 8.534566134370644e-06, "loss": 0.8161, "step": 5213 }, { "epoch": 1.438344827586207, "grad_norm": 3.6841249465942383, "learning_rate": 8.533920806235683e-06, "loss": 0.7474, "step": 5214 }, { "epoch": 1.4386206896551723, "grad_norm": 3.619529962539673, "learning_rate": 8.533275360450934e-06, "loss": 0.7937, "step": 5215 }, { "epoch": 1.4388965517241379, "grad_norm": 3.8898096084594727, "learning_rate": 8.532629797037884e-06, "loss": 0.9178, "step": 5216 }, { "epoch": 1.4391724137931035, "grad_norm": 4.096401214599609, "learning_rate": 8.531984116018023e-06, "loss": 0.7855, "step": 5217 }, { "epoch": 1.439448275862069, "grad_norm": 4.087979793548584, "learning_rate": 8.531338317412852e-06, "loss": 0.8204, "step": 5218 }, { "epoch": 1.4397241379310346, "grad_norm": 3.7469799518585205, "learning_rate": 8.530692401243867e-06, "loss": 0.8511, "step": 5219 }, { "epoch": 1.44, "grad_norm": 3.999376058578491, "learning_rate": 8.530046367532573e-06, "loss": 0.7205, "step": 5220 }, { "epoch": 1.4402758620689655, "grad_norm": 3.9708614349365234, "learning_rate": 8.529400216300476e-06, "loss": 0.7812, "step": 5221 }, { "epoch": 1.440551724137931, "grad_norm": 4.6111626625061035, "learning_rate": 8.52875394756909e-06, "loss": 0.93, "step": 5222 }, { "epoch": 1.4408275862068964, "grad_norm": 3.5469679832458496, "learning_rate": 8.528107561359927e-06, "loss": 0.6751, "step": 5223 }, { "epoch": 1.441103448275862, "grad_norm": 3.814755916595459, "learning_rate": 8.527461057694508e-06, "loss": 0.8773, "step": 5224 }, { "epoch": 1.4413793103448276, "grad_norm": 4.3230462074279785, "learning_rate": 8.526814436594355e-06, "loss": 0.7878, "step": 5225 }, { "epoch": 1.4416551724137932, "grad_norm": 3.9078164100646973, "learning_rate": 8.526167698080997e-06, "loss": 0.9533, "step": 5226 }, { "epoch": 1.4419310344827587, "grad_norm": 4.212288856506348, "learning_rate": 8.525520842175963e-06, "loss": 0.8293, "step": 5227 }, { "epoch": 1.442206896551724, "grad_norm": 3.9727725982666016, "learning_rate": 8.52487386890079e-06, "loss": 1.0057, "step": 5228 }, { "epoch": 1.4424827586206896, "grad_norm": 3.966625452041626, "learning_rate": 8.524226778277014e-06, "loss": 0.8007, "step": 5229 }, { "epoch": 1.4427586206896552, "grad_norm": 3.666564464569092, "learning_rate": 8.523579570326179e-06, "loss": 0.7904, "step": 5230 }, { "epoch": 1.4430344827586206, "grad_norm": 3.679457187652588, "learning_rate": 8.522932245069834e-06, "loss": 0.6234, "step": 5231 }, { "epoch": 1.4433103448275861, "grad_norm": 3.6826391220092773, "learning_rate": 8.522284802529525e-06, "loss": 0.8572, "step": 5232 }, { "epoch": 1.4435862068965517, "grad_norm": 3.745872974395752, "learning_rate": 8.52163724272681e-06, "loss": 0.8195, "step": 5233 }, { "epoch": 1.4438620689655173, "grad_norm": 4.0012359619140625, "learning_rate": 8.520989565683244e-06, "loss": 0.8863, "step": 5234 }, { "epoch": 1.4441379310344828, "grad_norm": 4.449496269226074, "learning_rate": 8.520341771420392e-06, "loss": 0.805, "step": 5235 }, { "epoch": 1.4444137931034482, "grad_norm": 3.734663248062134, "learning_rate": 8.51969385995982e-06, "loss": 0.7749, "step": 5236 }, { "epoch": 1.4446896551724138, "grad_norm": 3.6825273036956787, "learning_rate": 8.519045831323096e-06, "loss": 0.7974, "step": 5237 }, { "epoch": 1.4449655172413793, "grad_norm": 3.8822333812713623, "learning_rate": 8.518397685531796e-06, "loss": 0.8085, "step": 5238 }, { "epoch": 1.445241379310345, "grad_norm": 3.9597253799438477, "learning_rate": 8.517749422607496e-06, "loss": 0.7632, "step": 5239 }, { "epoch": 1.4455172413793105, "grad_norm": 4.070823669433594, "learning_rate": 8.517101042571779e-06, "loss": 0.8586, "step": 5240 }, { "epoch": 1.4457931034482758, "grad_norm": 3.931299924850464, "learning_rate": 8.51645254544623e-06, "loss": 0.9448, "step": 5241 }, { "epoch": 1.4460689655172414, "grad_norm": 3.9688949584960938, "learning_rate": 8.515803931252438e-06, "loss": 0.9226, "step": 5242 }, { "epoch": 1.446344827586207, "grad_norm": 4.232664108276367, "learning_rate": 8.515155200011997e-06, "loss": 0.7461, "step": 5243 }, { "epoch": 1.4466206896551723, "grad_norm": 3.960383176803589, "learning_rate": 8.514506351746506e-06, "loss": 0.8406, "step": 5244 }, { "epoch": 1.446896551724138, "grad_norm": 3.690535068511963, "learning_rate": 8.513857386477562e-06, "loss": 0.749, "step": 5245 }, { "epoch": 1.4471724137931035, "grad_norm": 4.242927074432373, "learning_rate": 8.513208304226774e-06, "loss": 0.8576, "step": 5246 }, { "epoch": 1.447448275862069, "grad_norm": 4.0832624435424805, "learning_rate": 8.51255910501575e-06, "loss": 0.7452, "step": 5247 }, { "epoch": 1.4477241379310346, "grad_norm": 3.677644968032837, "learning_rate": 8.511909788866099e-06, "loss": 0.8592, "step": 5248 }, { "epoch": 1.448, "grad_norm": 4.676161766052246, "learning_rate": 8.511260355799444e-06, "loss": 0.9975, "step": 5249 }, { "epoch": 1.4482758620689655, "grad_norm": 3.8933300971984863, "learning_rate": 8.510610805837403e-06, "loss": 0.7661, "step": 5250 }, { "epoch": 1.448551724137931, "grad_norm": 4.2137675285339355, "learning_rate": 8.5099611390016e-06, "loss": 0.9627, "step": 5251 }, { "epoch": 1.4488275862068964, "grad_norm": 4.015585899353027, "learning_rate": 8.509311355313664e-06, "loss": 0.9086, "step": 5252 }, { "epoch": 1.449103448275862, "grad_norm": 4.031211853027344, "learning_rate": 8.508661454795226e-06, "loss": 0.8562, "step": 5253 }, { "epoch": 1.4493793103448276, "grad_norm": 3.5410454273223877, "learning_rate": 8.508011437467922e-06, "loss": 0.851, "step": 5254 }, { "epoch": 1.4496551724137932, "grad_norm": 3.8884685039520264, "learning_rate": 8.507361303353396e-06, "loss": 0.7535, "step": 5255 }, { "epoch": 1.4499310344827587, "grad_norm": 3.8535819053649902, "learning_rate": 8.506711052473287e-06, "loss": 0.7017, "step": 5256 }, { "epoch": 1.450206896551724, "grad_norm": 3.9159202575683594, "learning_rate": 8.506060684849247e-06, "loss": 0.6958, "step": 5257 }, { "epoch": 1.4504827586206896, "grad_norm": 3.918705940246582, "learning_rate": 8.505410200502926e-06, "loss": 0.7652, "step": 5258 }, { "epoch": 1.4507586206896552, "grad_norm": 4.306738376617432, "learning_rate": 8.504759599455979e-06, "loss": 0.7815, "step": 5259 }, { "epoch": 1.4510344827586206, "grad_norm": 4.208827972412109, "learning_rate": 8.504108881730067e-06, "loss": 0.8998, "step": 5260 }, { "epoch": 1.4513103448275861, "grad_norm": 3.896390199661255, "learning_rate": 8.50345804734685e-06, "loss": 0.8025, "step": 5261 }, { "epoch": 1.4515862068965517, "grad_norm": 4.022619724273682, "learning_rate": 8.502807096328e-06, "loss": 0.879, "step": 5262 }, { "epoch": 1.4518620689655173, "grad_norm": 3.779003858566284, "learning_rate": 8.502156028695187e-06, "loss": 0.6374, "step": 5263 }, { "epoch": 1.4521379310344829, "grad_norm": 3.9190518856048584, "learning_rate": 8.501504844470084e-06, "loss": 0.7794, "step": 5264 }, { "epoch": 1.4524137931034482, "grad_norm": 3.888758420944214, "learning_rate": 8.50085354367437e-06, "loss": 0.6652, "step": 5265 }, { "epoch": 1.4526896551724138, "grad_norm": 4.20842981338501, "learning_rate": 8.50020212632973e-06, "loss": 0.7989, "step": 5266 }, { "epoch": 1.4529655172413793, "grad_norm": 4.472293853759766, "learning_rate": 8.49955059245785e-06, "loss": 0.9726, "step": 5267 }, { "epoch": 1.4532413793103447, "grad_norm": 4.222445487976074, "learning_rate": 8.49889894208042e-06, "loss": 0.7948, "step": 5268 }, { "epoch": 1.4535172413793103, "grad_norm": 4.121272087097168, "learning_rate": 8.498247175219134e-06, "loss": 0.8021, "step": 5269 }, { "epoch": 1.4537931034482758, "grad_norm": 4.2080607414245605, "learning_rate": 8.497595291895693e-06, "loss": 0.8351, "step": 5270 }, { "epoch": 1.4540689655172414, "grad_norm": 3.920161247253418, "learning_rate": 8.496943292131795e-06, "loss": 0.8087, "step": 5271 }, { "epoch": 1.454344827586207, "grad_norm": 4.526780128479004, "learning_rate": 8.496291175949149e-06, "loss": 0.997, "step": 5272 }, { "epoch": 1.4546206896551723, "grad_norm": 3.687544822692871, "learning_rate": 8.495638943369465e-06, "loss": 0.7442, "step": 5273 }, { "epoch": 1.454896551724138, "grad_norm": 3.8861501216888428, "learning_rate": 8.494986594414455e-06, "loss": 0.8364, "step": 5274 }, { "epoch": 1.4551724137931035, "grad_norm": 4.053118705749512, "learning_rate": 8.494334129105838e-06, "loss": 0.9788, "step": 5275 }, { "epoch": 1.455448275862069, "grad_norm": 3.9529759883880615, "learning_rate": 8.493681547465337e-06, "loss": 0.8552, "step": 5276 }, { "epoch": 1.4557241379310346, "grad_norm": 3.8960602283477783, "learning_rate": 8.493028849514676e-06, "loss": 0.7891, "step": 5277 }, { "epoch": 1.456, "grad_norm": 3.716677188873291, "learning_rate": 8.492376035275583e-06, "loss": 0.7503, "step": 5278 }, { "epoch": 1.4562758620689655, "grad_norm": 3.9993324279785156, "learning_rate": 8.491723104769791e-06, "loss": 0.759, "step": 5279 }, { "epoch": 1.456551724137931, "grad_norm": 3.996655225753784, "learning_rate": 8.491070058019042e-06, "loss": 0.8577, "step": 5280 }, { "epoch": 1.4568275862068965, "grad_norm": 3.8309242725372314, "learning_rate": 8.49041689504507e-06, "loss": 0.7667, "step": 5281 }, { "epoch": 1.457103448275862, "grad_norm": 3.8128209114074707, "learning_rate": 8.489763615869626e-06, "loss": 0.7953, "step": 5282 }, { "epoch": 1.4573793103448276, "grad_norm": 4.1124982833862305, "learning_rate": 8.489110220514456e-06, "loss": 0.8287, "step": 5283 }, { "epoch": 1.4576551724137932, "grad_norm": 4.042701244354248, "learning_rate": 8.488456709001312e-06, "loss": 0.7917, "step": 5284 }, { "epoch": 1.4579310344827587, "grad_norm": 3.9943861961364746, "learning_rate": 8.487803081351951e-06, "loss": 0.9264, "step": 5285 }, { "epoch": 1.458206896551724, "grad_norm": 3.9502310752868652, "learning_rate": 8.487149337588132e-06, "loss": 0.8367, "step": 5286 }, { "epoch": 1.4584827586206897, "grad_norm": 3.8317599296569824, "learning_rate": 8.486495477731623e-06, "loss": 0.7144, "step": 5287 }, { "epoch": 1.4587586206896552, "grad_norm": 3.9118874073028564, "learning_rate": 8.485841501804188e-06, "loss": 0.8591, "step": 5288 }, { "epoch": 1.4590344827586206, "grad_norm": 4.259987831115723, "learning_rate": 8.4851874098276e-06, "loss": 0.8236, "step": 5289 }, { "epoch": 1.4593103448275861, "grad_norm": 4.036953449249268, "learning_rate": 8.484533201823637e-06, "loss": 0.7413, "step": 5290 }, { "epoch": 1.4595862068965517, "grad_norm": 4.066192626953125, "learning_rate": 8.483878877814075e-06, "loss": 0.8, "step": 5291 }, { "epoch": 1.4598620689655173, "grad_norm": 3.9888477325439453, "learning_rate": 8.483224437820701e-06, "loss": 0.8174, "step": 5292 }, { "epoch": 1.4601379310344829, "grad_norm": 4.275948524475098, "learning_rate": 8.4825698818653e-06, "loss": 0.8817, "step": 5293 }, { "epoch": 1.4604137931034482, "grad_norm": 3.54467511177063, "learning_rate": 8.481915209969663e-06, "loss": 0.7485, "step": 5294 }, { "epoch": 1.4606896551724138, "grad_norm": 3.719561815261841, "learning_rate": 8.481260422155587e-06, "loss": 0.702, "step": 5295 }, { "epoch": 1.4609655172413794, "grad_norm": 4.7935943603515625, "learning_rate": 8.480605518444869e-06, "loss": 0.9774, "step": 5296 }, { "epoch": 1.4612413793103447, "grad_norm": 4.575099945068359, "learning_rate": 8.479950498859312e-06, "loss": 0.8632, "step": 5297 }, { "epoch": 1.4615172413793103, "grad_norm": 3.5490405559539795, "learning_rate": 8.479295363420726e-06, "loss": 0.7774, "step": 5298 }, { "epoch": 1.4617931034482758, "grad_norm": 4.15098237991333, "learning_rate": 8.478640112150916e-06, "loss": 0.8705, "step": 5299 }, { "epoch": 1.4620689655172414, "grad_norm": 3.832658052444458, "learning_rate": 8.477984745071699e-06, "loss": 0.7012, "step": 5300 }, { "epoch": 1.462344827586207, "grad_norm": 3.887437582015991, "learning_rate": 8.477329262204894e-06, "loss": 0.7881, "step": 5301 }, { "epoch": 1.4626206896551723, "grad_norm": 4.2218475341796875, "learning_rate": 8.476673663572324e-06, "loss": 0.8992, "step": 5302 }, { "epoch": 1.462896551724138, "grad_norm": 3.647630453109741, "learning_rate": 8.47601794919581e-06, "loss": 0.8075, "step": 5303 }, { "epoch": 1.4631724137931035, "grad_norm": 3.9751980304718018, "learning_rate": 8.475362119097186e-06, "loss": 0.9087, "step": 5304 }, { "epoch": 1.463448275862069, "grad_norm": 3.7504377365112305, "learning_rate": 8.474706173298286e-06, "loss": 0.7658, "step": 5305 }, { "epoch": 1.4637241379310346, "grad_norm": 3.4112462997436523, "learning_rate": 8.474050111820943e-06, "loss": 0.708, "step": 5306 }, { "epoch": 1.464, "grad_norm": 3.9311721324920654, "learning_rate": 8.473393934687005e-06, "loss": 0.8519, "step": 5307 }, { "epoch": 1.4642758620689655, "grad_norm": 3.9275712966918945, "learning_rate": 8.472737641918312e-06, "loss": 0.895, "step": 5308 }, { "epoch": 1.464551724137931, "grad_norm": 3.9007365703582764, "learning_rate": 8.472081233536714e-06, "loss": 0.8175, "step": 5309 }, { "epoch": 1.4648275862068965, "grad_norm": 3.47953724861145, "learning_rate": 8.471424709564064e-06, "loss": 0.7543, "step": 5310 }, { "epoch": 1.465103448275862, "grad_norm": 4.080090045928955, "learning_rate": 8.470768070022221e-06, "loss": 0.707, "step": 5311 }, { "epoch": 1.4653793103448276, "grad_norm": 3.7875730991363525, "learning_rate": 8.470111314933042e-06, "loss": 0.778, "step": 5312 }, { "epoch": 1.4656551724137932, "grad_norm": 3.907137393951416, "learning_rate": 8.469454444318395e-06, "loss": 0.7021, "step": 5313 }, { "epoch": 1.4659310344827587, "grad_norm": 4.090037822723389, "learning_rate": 8.468797458200145e-06, "loss": 0.8158, "step": 5314 }, { "epoch": 1.466206896551724, "grad_norm": 3.720087766647339, "learning_rate": 8.468140356600165e-06, "loss": 0.8635, "step": 5315 }, { "epoch": 1.4664827586206897, "grad_norm": 3.4439170360565186, "learning_rate": 8.467483139540333e-06, "loss": 0.6789, "step": 5316 }, { "epoch": 1.4667586206896552, "grad_norm": 3.7475032806396484, "learning_rate": 8.466825807042526e-06, "loss": 0.7856, "step": 5317 }, { "epoch": 1.4670344827586206, "grad_norm": 3.9762704372406006, "learning_rate": 8.46616835912863e-06, "loss": 0.7062, "step": 5318 }, { "epoch": 1.4673103448275862, "grad_norm": 3.919717311859131, "learning_rate": 8.46551079582053e-06, "loss": 0.8137, "step": 5319 }, { "epoch": 1.4675862068965517, "grad_norm": 4.287310600280762, "learning_rate": 8.464853117140119e-06, "loss": 0.9212, "step": 5320 }, { "epoch": 1.4678620689655173, "grad_norm": 4.461577892303467, "learning_rate": 8.464195323109292e-06, "loss": 0.904, "step": 5321 }, { "epoch": 1.4681379310344829, "grad_norm": 3.718618392944336, "learning_rate": 8.463537413749947e-06, "loss": 0.6699, "step": 5322 }, { "epoch": 1.4684137931034482, "grad_norm": 3.623051404953003, "learning_rate": 8.462879389083989e-06, "loss": 0.6774, "step": 5323 }, { "epoch": 1.4686896551724138, "grad_norm": 3.9123589992523193, "learning_rate": 8.462221249133323e-06, "loss": 0.7766, "step": 5324 }, { "epoch": 1.4689655172413794, "grad_norm": 3.8933017253875732, "learning_rate": 8.461562993919858e-06, "loss": 0.7709, "step": 5325 }, { "epoch": 1.4692413793103447, "grad_norm": 4.079205513000488, "learning_rate": 8.460904623465514e-06, "loss": 0.8323, "step": 5326 }, { "epoch": 1.4695172413793103, "grad_norm": 4.612973690032959, "learning_rate": 8.460246137792201e-06, "loss": 0.9431, "step": 5327 }, { "epoch": 1.4697931034482759, "grad_norm": 4.147571563720703, "learning_rate": 8.45958753692185e-06, "loss": 0.7674, "step": 5328 }, { "epoch": 1.4700689655172414, "grad_norm": 4.700186252593994, "learning_rate": 8.458928820876378e-06, "loss": 0.9073, "step": 5329 }, { "epoch": 1.470344827586207, "grad_norm": 3.987443685531616, "learning_rate": 8.45826998967772e-06, "loss": 0.8261, "step": 5330 }, { "epoch": 1.4706206896551723, "grad_norm": 4.680233001708984, "learning_rate": 8.45761104334781e-06, "loss": 0.84, "step": 5331 }, { "epoch": 1.470896551724138, "grad_norm": 4.264634132385254, "learning_rate": 8.456951981908583e-06, "loss": 0.9223, "step": 5332 }, { "epoch": 1.4711724137931035, "grad_norm": 4.0322794914245605, "learning_rate": 8.456292805381981e-06, "loss": 0.7001, "step": 5333 }, { "epoch": 1.4714482758620688, "grad_norm": 3.9982399940490723, "learning_rate": 8.45563351378995e-06, "loss": 0.8827, "step": 5334 }, { "epoch": 1.4717241379310344, "grad_norm": 3.6965885162353516, "learning_rate": 8.454974107154437e-06, "loss": 0.7617, "step": 5335 }, { "epoch": 1.472, "grad_norm": 3.552163600921631, "learning_rate": 8.454314585497396e-06, "loss": 0.7952, "step": 5336 }, { "epoch": 1.4722758620689655, "grad_norm": 3.749000310897827, "learning_rate": 8.453654948840782e-06, "loss": 0.7723, "step": 5337 }, { "epoch": 1.4725517241379311, "grad_norm": 3.943549156188965, "learning_rate": 8.452995197206558e-06, "loss": 0.9079, "step": 5338 }, { "epoch": 1.4728275862068965, "grad_norm": 3.876549482345581, "learning_rate": 8.452335330616686e-06, "loss": 0.8626, "step": 5339 }, { "epoch": 1.473103448275862, "grad_norm": 4.250669956207275, "learning_rate": 8.451675349093134e-06, "loss": 1.0084, "step": 5340 }, { "epoch": 1.4733793103448276, "grad_norm": 3.92140793800354, "learning_rate": 8.451015252657875e-06, "loss": 0.6996, "step": 5341 }, { "epoch": 1.4736551724137932, "grad_norm": 4.09222412109375, "learning_rate": 8.450355041332883e-06, "loss": 0.821, "step": 5342 }, { "epoch": 1.4739310344827588, "grad_norm": 4.301405429840088, "learning_rate": 8.44969471514014e-06, "loss": 0.8042, "step": 5343 }, { "epoch": 1.474206896551724, "grad_norm": 3.6086535453796387, "learning_rate": 8.449034274101627e-06, "loss": 0.7538, "step": 5344 }, { "epoch": 1.4744827586206897, "grad_norm": 4.027694225311279, "learning_rate": 8.448373718239333e-06, "loss": 0.7817, "step": 5345 }, { "epoch": 1.4747586206896552, "grad_norm": 3.8807942867279053, "learning_rate": 8.447713047575248e-06, "loss": 0.6852, "step": 5346 }, { "epoch": 1.4750344827586206, "grad_norm": 3.8820056915283203, "learning_rate": 8.447052262131365e-06, "loss": 0.8068, "step": 5347 }, { "epoch": 1.4753103448275862, "grad_norm": 3.598330497741699, "learning_rate": 8.446391361929685e-06, "loss": 0.6952, "step": 5348 }, { "epoch": 1.4755862068965517, "grad_norm": 4.374686241149902, "learning_rate": 8.445730346992212e-06, "loss": 0.8582, "step": 5349 }, { "epoch": 1.4758620689655173, "grad_norm": 3.9727725982666016, "learning_rate": 8.445069217340946e-06, "loss": 0.8201, "step": 5350 }, { "epoch": 1.4761379310344829, "grad_norm": 3.9775664806365967, "learning_rate": 8.444407972997904e-06, "loss": 0.7992, "step": 5351 }, { "epoch": 1.4764137931034482, "grad_norm": 3.9304840564727783, "learning_rate": 8.443746613985096e-06, "loss": 0.8159, "step": 5352 }, { "epoch": 1.4766896551724138, "grad_norm": 4.276658535003662, "learning_rate": 8.443085140324541e-06, "loss": 0.7544, "step": 5353 }, { "epoch": 1.4769655172413794, "grad_norm": 4.1556477546691895, "learning_rate": 8.442423552038259e-06, "loss": 0.8857, "step": 5354 }, { "epoch": 1.4772413793103447, "grad_norm": 3.7991371154785156, "learning_rate": 8.441761849148276e-06, "loss": 0.8204, "step": 5355 }, { "epoch": 1.4775172413793103, "grad_norm": 3.9685187339782715, "learning_rate": 8.441100031676623e-06, "loss": 0.749, "step": 5356 }, { "epoch": 1.4777931034482759, "grad_norm": 3.7769722938537598, "learning_rate": 8.440438099645332e-06, "loss": 0.7364, "step": 5357 }, { "epoch": 1.4780689655172414, "grad_norm": 4.382603168487549, "learning_rate": 8.439776053076436e-06, "loss": 0.8655, "step": 5358 }, { "epoch": 1.478344827586207, "grad_norm": 4.141266822814941, "learning_rate": 8.439113891991981e-06, "loss": 0.7339, "step": 5359 }, { "epoch": 1.4786206896551723, "grad_norm": 4.156026840209961, "learning_rate": 8.438451616414009e-06, "loss": 0.8526, "step": 5360 }, { "epoch": 1.478896551724138, "grad_norm": 3.821057081222534, "learning_rate": 8.437789226364568e-06, "loss": 0.7677, "step": 5361 }, { "epoch": 1.4791724137931035, "grad_norm": 4.219390869140625, "learning_rate": 8.43712672186571e-06, "loss": 0.8727, "step": 5362 }, { "epoch": 1.4794482758620688, "grad_norm": 4.110195636749268, "learning_rate": 8.436464102939492e-06, "loss": 0.7721, "step": 5363 }, { "epoch": 1.4797241379310344, "grad_norm": 3.8593220710754395, "learning_rate": 8.435801369607972e-06, "loss": 0.79, "step": 5364 }, { "epoch": 1.48, "grad_norm": 4.14307975769043, "learning_rate": 8.435138521893215e-06, "loss": 0.8984, "step": 5365 }, { "epoch": 1.4802758620689656, "grad_norm": 3.8797495365142822, "learning_rate": 8.434475559817287e-06, "loss": 0.8834, "step": 5366 }, { "epoch": 1.4805517241379311, "grad_norm": 4.1963210105896, "learning_rate": 8.433812483402259e-06, "loss": 0.8197, "step": 5367 }, { "epoch": 1.4808275862068965, "grad_norm": 3.9467403888702393, "learning_rate": 8.433149292670207e-06, "loss": 0.834, "step": 5368 }, { "epoch": 1.481103448275862, "grad_norm": 3.8697118759155273, "learning_rate": 8.432485987643208e-06, "loss": 0.8358, "step": 5369 }, { "epoch": 1.4813793103448276, "grad_norm": 3.6920340061187744, "learning_rate": 8.431822568343349e-06, "loss": 0.7752, "step": 5370 }, { "epoch": 1.4816551724137932, "grad_norm": 3.6326076984405518, "learning_rate": 8.431159034792708e-06, "loss": 0.7854, "step": 5371 }, { "epoch": 1.4819310344827588, "grad_norm": 4.125669002532959, "learning_rate": 8.430495387013385e-06, "loss": 0.8583, "step": 5372 }, { "epoch": 1.482206896551724, "grad_norm": 3.546189069747925, "learning_rate": 8.429831625027465e-06, "loss": 0.7646, "step": 5373 }, { "epoch": 1.4824827586206897, "grad_norm": 4.036953926086426, "learning_rate": 8.42916774885705e-06, "loss": 0.7974, "step": 5374 }, { "epoch": 1.4827586206896552, "grad_norm": 4.2540812492370605, "learning_rate": 8.42850375852424e-06, "loss": 0.8632, "step": 5375 }, { "epoch": 1.4830344827586206, "grad_norm": 4.432225704193115, "learning_rate": 8.427839654051145e-06, "loss": 0.8496, "step": 5376 }, { "epoch": 1.4833103448275862, "grad_norm": 3.9164106845855713, "learning_rate": 8.427175435459866e-06, "loss": 0.8867, "step": 5377 }, { "epoch": 1.4835862068965517, "grad_norm": 3.871023178100586, "learning_rate": 8.426511102772522e-06, "loss": 0.7425, "step": 5378 }, { "epoch": 1.4838620689655173, "grad_norm": 4.253456115722656, "learning_rate": 8.425846656011228e-06, "loss": 0.7768, "step": 5379 }, { "epoch": 1.4841379310344829, "grad_norm": 3.754329204559326, "learning_rate": 8.425182095198103e-06, "loss": 0.8025, "step": 5380 }, { "epoch": 1.4844137931034482, "grad_norm": 4.2127580642700195, "learning_rate": 8.424517420355274e-06, "loss": 0.969, "step": 5381 }, { "epoch": 1.4846896551724138, "grad_norm": 3.87577223777771, "learning_rate": 8.423852631504868e-06, "loss": 0.7473, "step": 5382 }, { "epoch": 1.4849655172413794, "grad_norm": 4.0837788581848145, "learning_rate": 8.423187728669016e-06, "loss": 0.9665, "step": 5383 }, { "epoch": 1.4852413793103447, "grad_norm": 3.850766897201538, "learning_rate": 8.422522711869853e-06, "loss": 0.6988, "step": 5384 }, { "epoch": 1.4855172413793103, "grad_norm": 3.895137310028076, "learning_rate": 8.42185758112952e-06, "loss": 0.9505, "step": 5385 }, { "epoch": 1.4857931034482759, "grad_norm": 3.9684016704559326, "learning_rate": 8.42119233647016e-06, "loss": 0.7714, "step": 5386 }, { "epoch": 1.4860689655172414, "grad_norm": 4.260488510131836, "learning_rate": 8.420526977913921e-06, "loss": 0.8797, "step": 5387 }, { "epoch": 1.486344827586207, "grad_norm": 4.286162853240967, "learning_rate": 8.419861505482953e-06, "loss": 0.9708, "step": 5388 }, { "epoch": 1.4866206896551724, "grad_norm": 3.9879074096679688, "learning_rate": 8.41919591919941e-06, "loss": 0.77, "step": 5389 }, { "epoch": 1.486896551724138, "grad_norm": 4.619874954223633, "learning_rate": 8.41853021908545e-06, "loss": 0.9062, "step": 5390 }, { "epoch": 1.4871724137931035, "grad_norm": 3.668196201324463, "learning_rate": 8.417864405163236e-06, "loss": 0.7492, "step": 5391 }, { "epoch": 1.4874482758620688, "grad_norm": 4.698381423950195, "learning_rate": 8.417198477454935e-06, "loss": 0.8116, "step": 5392 }, { "epoch": 1.4877241379310344, "grad_norm": 4.349893569946289, "learning_rate": 8.416532435982716e-06, "loss": 0.9403, "step": 5393 }, { "epoch": 1.488, "grad_norm": 3.8012948036193848, "learning_rate": 8.415866280768749e-06, "loss": 0.8151, "step": 5394 }, { "epoch": 1.4882758620689656, "grad_norm": 4.106533050537109, "learning_rate": 8.415200011835219e-06, "loss": 0.8936, "step": 5395 }, { "epoch": 1.4885517241379311, "grad_norm": 3.911104679107666, "learning_rate": 8.4145336292043e-06, "loss": 0.8456, "step": 5396 }, { "epoch": 1.4888275862068965, "grad_norm": 3.82171893119812, "learning_rate": 8.413867132898183e-06, "loss": 0.7223, "step": 5397 }, { "epoch": 1.489103448275862, "grad_norm": 3.9750044345855713, "learning_rate": 8.41320052293905e-06, "loss": 0.9033, "step": 5398 }, { "epoch": 1.4893793103448276, "grad_norm": 3.510326385498047, "learning_rate": 8.4125337993491e-06, "loss": 0.7422, "step": 5399 }, { "epoch": 1.489655172413793, "grad_norm": 3.8297324180603027, "learning_rate": 8.411866962150524e-06, "loss": 0.8788, "step": 5400 }, { "epoch": 1.4899310344827585, "grad_norm": 3.648561477661133, "learning_rate": 8.411200011365525e-06, "loss": 0.7338, "step": 5401 }, { "epoch": 1.4902068965517241, "grad_norm": 3.807814359664917, "learning_rate": 8.410532947016307e-06, "loss": 0.8232, "step": 5402 }, { "epoch": 1.4904827586206897, "grad_norm": 4.376630783081055, "learning_rate": 8.409865769125076e-06, "loss": 1.0481, "step": 5403 }, { "epoch": 1.4907586206896553, "grad_norm": 3.9411685466766357, "learning_rate": 8.409198477714045e-06, "loss": 0.7379, "step": 5404 }, { "epoch": 1.4910344827586206, "grad_norm": 3.771491527557373, "learning_rate": 8.408531072805427e-06, "loss": 0.7367, "step": 5405 }, { "epoch": 1.4913103448275862, "grad_norm": 3.8985068798065186, "learning_rate": 8.407863554421443e-06, "loss": 0.8603, "step": 5406 }, { "epoch": 1.4915862068965517, "grad_norm": 3.7310450077056885, "learning_rate": 8.407195922584315e-06, "loss": 0.8597, "step": 5407 }, { "epoch": 1.4918620689655173, "grad_norm": 4.368929386138916, "learning_rate": 8.40652817731627e-06, "loss": 0.8367, "step": 5408 }, { "epoch": 1.492137931034483, "grad_norm": 3.8534836769104004, "learning_rate": 8.405860318639538e-06, "loss": 0.7419, "step": 5409 }, { "epoch": 1.4924137931034482, "grad_norm": 3.9306414127349854, "learning_rate": 8.405192346576351e-06, "loss": 0.7395, "step": 5410 }, { "epoch": 1.4926896551724138, "grad_norm": 3.9584157466888428, "learning_rate": 8.404524261148951e-06, "loss": 0.862, "step": 5411 }, { "epoch": 1.4929655172413794, "grad_norm": 3.468611001968384, "learning_rate": 8.403856062379576e-06, "loss": 0.8055, "step": 5412 }, { "epoch": 1.4932413793103447, "grad_norm": 3.8375301361083984, "learning_rate": 8.403187750290476e-06, "loss": 0.8251, "step": 5413 }, { "epoch": 1.4935172413793103, "grad_norm": 3.9836442470550537, "learning_rate": 8.402519324903893e-06, "loss": 0.8654, "step": 5414 }, { "epoch": 1.4937931034482759, "grad_norm": 3.8694705963134766, "learning_rate": 8.401850786242085e-06, "loss": 0.9172, "step": 5415 }, { "epoch": 1.4940689655172414, "grad_norm": 3.9644219875335693, "learning_rate": 8.401182134327309e-06, "loss": 0.7411, "step": 5416 }, { "epoch": 1.494344827586207, "grad_norm": 3.637699842453003, "learning_rate": 8.400513369181823e-06, "loss": 0.7866, "step": 5417 }, { "epoch": 1.4946206896551724, "grad_norm": 4.0698676109313965, "learning_rate": 8.399844490827892e-06, "loss": 0.7079, "step": 5418 }, { "epoch": 1.494896551724138, "grad_norm": 4.135048866271973, "learning_rate": 8.399175499287786e-06, "loss": 0.8884, "step": 5419 }, { "epoch": 1.4951724137931035, "grad_norm": 4.157135963439941, "learning_rate": 8.398506394583774e-06, "loss": 0.928, "step": 5420 }, { "epoch": 1.4954482758620689, "grad_norm": 4.087106704711914, "learning_rate": 8.397837176738131e-06, "loss": 0.8684, "step": 5421 }, { "epoch": 1.4957241379310344, "grad_norm": 3.7772116661071777, "learning_rate": 8.39716784577314e-06, "loss": 0.7345, "step": 5422 }, { "epoch": 1.496, "grad_norm": 3.7822775840759277, "learning_rate": 8.396498401711082e-06, "loss": 0.9824, "step": 5423 }, { "epoch": 1.4962758620689656, "grad_norm": 3.8373515605926514, "learning_rate": 8.395828844574243e-06, "loss": 0.8867, "step": 5424 }, { "epoch": 1.4965517241379311, "grad_norm": 3.9693799018859863, "learning_rate": 8.395159174384916e-06, "loss": 0.7928, "step": 5425 }, { "epoch": 1.4968275862068965, "grad_norm": 3.746102809906006, "learning_rate": 8.394489391165393e-06, "loss": 0.8352, "step": 5426 }, { "epoch": 1.497103448275862, "grad_norm": 3.8586299419403076, "learning_rate": 8.393819494937973e-06, "loss": 0.7668, "step": 5427 }, { "epoch": 1.4973793103448276, "grad_norm": 3.411092519760132, "learning_rate": 8.393149485724959e-06, "loss": 0.6762, "step": 5428 }, { "epoch": 1.497655172413793, "grad_norm": 4.061954498291016, "learning_rate": 8.392479363548654e-06, "loss": 0.9346, "step": 5429 }, { "epoch": 1.4979310344827585, "grad_norm": 4.3085784912109375, "learning_rate": 8.391809128431368e-06, "loss": 1.0006, "step": 5430 }, { "epoch": 1.4982068965517241, "grad_norm": 3.72737455368042, "learning_rate": 8.391138780395418e-06, "loss": 0.7981, "step": 5431 }, { "epoch": 1.4984827586206897, "grad_norm": 3.5781240463256836, "learning_rate": 8.390468319463117e-06, "loss": 0.7208, "step": 5432 }, { "epoch": 1.4987586206896553, "grad_norm": 4.264692783355713, "learning_rate": 8.389797745656787e-06, "loss": 0.8486, "step": 5433 }, { "epoch": 1.4990344827586206, "grad_norm": 4.110798358917236, "learning_rate": 8.389127058998751e-06, "loss": 0.8218, "step": 5434 }, { "epoch": 1.4993103448275862, "grad_norm": 3.7563211917877197, "learning_rate": 8.38845625951134e-06, "loss": 0.7897, "step": 5435 }, { "epoch": 1.4995862068965518, "grad_norm": 4.007946014404297, "learning_rate": 8.387785347216884e-06, "loss": 0.7885, "step": 5436 }, { "epoch": 1.499862068965517, "grad_norm": 3.9249777793884277, "learning_rate": 8.387114322137719e-06, "loss": 0.7788, "step": 5437 }, { "epoch": 1.500137931034483, "grad_norm": 3.8381295204162598, "learning_rate": 8.386443184296184e-06, "loss": 0.7238, "step": 5438 }, { "epoch": 1.5004137931034482, "grad_norm": 3.53072452545166, "learning_rate": 8.385771933714626e-06, "loss": 0.7305, "step": 5439 }, { "epoch": 1.5006896551724138, "grad_norm": 4.433712959289551, "learning_rate": 8.385100570415387e-06, "loss": 0.8616, "step": 5440 }, { "epoch": 1.5009655172413794, "grad_norm": 3.849081039428711, "learning_rate": 8.384429094420819e-06, "loss": 0.7752, "step": 5441 }, { "epoch": 1.5012413793103447, "grad_norm": 4.033702850341797, "learning_rate": 8.383757505753278e-06, "loss": 0.8094, "step": 5442 }, { "epoch": 1.5015172413793103, "grad_norm": 4.092620372772217, "learning_rate": 8.38308580443512e-06, "loss": 0.7775, "step": 5443 }, { "epoch": 1.5017931034482759, "grad_norm": 3.4967682361602783, "learning_rate": 8.38241399048871e-06, "loss": 0.7524, "step": 5444 }, { "epoch": 1.5020689655172412, "grad_norm": 4.106240749359131, "learning_rate": 8.381742063936411e-06, "loss": 0.8546, "step": 5445 }, { "epoch": 1.502344827586207, "grad_norm": 3.7330899238586426, "learning_rate": 8.381070024800594e-06, "loss": 0.7385, "step": 5446 }, { "epoch": 1.5026206896551724, "grad_norm": 4.408742427825928, "learning_rate": 8.380397873103634e-06, "loss": 0.8843, "step": 5447 }, { "epoch": 1.502896551724138, "grad_norm": 3.7606375217437744, "learning_rate": 8.379725608867903e-06, "loss": 0.8435, "step": 5448 }, { "epoch": 1.5031724137931035, "grad_norm": 3.793283224105835, "learning_rate": 8.379053232115787e-06, "loss": 0.8239, "step": 5449 }, { "epoch": 1.5034482758620689, "grad_norm": 4.270677089691162, "learning_rate": 8.378380742869665e-06, "loss": 0.8914, "step": 5450 }, { "epoch": 1.5037241379310344, "grad_norm": 4.062459945678711, "learning_rate": 8.37770814115193e-06, "loss": 0.7845, "step": 5451 }, { "epoch": 1.504, "grad_norm": 3.927238941192627, "learning_rate": 8.377035426984972e-06, "loss": 0.7835, "step": 5452 }, { "epoch": 1.5042758620689654, "grad_norm": 3.9499804973602295, "learning_rate": 8.376362600391188e-06, "loss": 0.8655, "step": 5453 }, { "epoch": 1.5045517241379311, "grad_norm": 4.290650367736816, "learning_rate": 8.375689661392975e-06, "loss": 0.8019, "step": 5454 }, { "epoch": 1.5048275862068965, "grad_norm": 3.976318359375, "learning_rate": 8.375016610012738e-06, "loss": 0.7674, "step": 5455 }, { "epoch": 1.505103448275862, "grad_norm": 3.755887985229492, "learning_rate": 8.374343446272884e-06, "loss": 0.7732, "step": 5456 }, { "epoch": 1.5053793103448276, "grad_norm": 3.863619327545166, "learning_rate": 8.373670170195825e-06, "loss": 0.7858, "step": 5457 }, { "epoch": 1.505655172413793, "grad_norm": 4.189598560333252, "learning_rate": 8.37299678180397e-06, "loss": 0.9197, "step": 5458 }, { "epoch": 1.5059310344827588, "grad_norm": 3.4779696464538574, "learning_rate": 8.372323281119745e-06, "loss": 0.6075, "step": 5459 }, { "epoch": 1.5062068965517241, "grad_norm": 4.393091201782227, "learning_rate": 8.371649668165564e-06, "loss": 0.7875, "step": 5460 }, { "epoch": 1.5064827586206897, "grad_norm": 4.172257900238037, "learning_rate": 8.370975942963858e-06, "loss": 0.8088, "step": 5461 }, { "epoch": 1.5067586206896553, "grad_norm": 4.453355312347412, "learning_rate": 8.370302105537053e-06, "loss": 0.9658, "step": 5462 }, { "epoch": 1.5070344827586206, "grad_norm": 4.252310752868652, "learning_rate": 8.369628155907588e-06, "loss": 0.8326, "step": 5463 }, { "epoch": 1.5073103448275862, "grad_norm": 3.611875534057617, "learning_rate": 8.368954094097891e-06, "loss": 0.6962, "step": 5464 }, { "epoch": 1.5075862068965518, "grad_norm": 4.004918575286865, "learning_rate": 8.368279920130412e-06, "loss": 0.8314, "step": 5465 }, { "epoch": 1.507862068965517, "grad_norm": 4.010768413543701, "learning_rate": 8.367605634027586e-06, "loss": 0.7117, "step": 5466 }, { "epoch": 1.508137931034483, "grad_norm": 3.7270150184631348, "learning_rate": 8.366931235811868e-06, "loss": 0.7701, "step": 5467 }, { "epoch": 1.5084137931034483, "grad_norm": 3.9255762100219727, "learning_rate": 8.366256725505708e-06, "loss": 0.8444, "step": 5468 }, { "epoch": 1.5086896551724138, "grad_norm": 3.4991440773010254, "learning_rate": 8.36558210313156e-06, "loss": 0.7628, "step": 5469 }, { "epoch": 1.5089655172413794, "grad_norm": 3.7310545444488525, "learning_rate": 8.364907368711884e-06, "loss": 0.8315, "step": 5470 }, { "epoch": 1.5092413793103447, "grad_norm": 3.520530939102173, "learning_rate": 8.364232522269144e-06, "loss": 0.7807, "step": 5471 }, { "epoch": 1.5095172413793103, "grad_norm": 3.534088373184204, "learning_rate": 8.363557563825806e-06, "loss": 0.7518, "step": 5472 }, { "epoch": 1.5097931034482759, "grad_norm": 3.838247537612915, "learning_rate": 8.362882493404339e-06, "loss": 0.8587, "step": 5473 }, { "epoch": 1.5100689655172412, "grad_norm": 4.5137224197387695, "learning_rate": 8.362207311027219e-06, "loss": 0.8549, "step": 5474 }, { "epoch": 1.510344827586207, "grad_norm": 4.131216526031494, "learning_rate": 8.361532016716922e-06, "loss": 0.8557, "step": 5475 }, { "epoch": 1.5106206896551724, "grad_norm": 4.051892280578613, "learning_rate": 8.360856610495933e-06, "loss": 0.8436, "step": 5476 }, { "epoch": 1.510896551724138, "grad_norm": 4.016448497772217, "learning_rate": 8.360181092386734e-06, "loss": 0.7986, "step": 5477 }, { "epoch": 1.5111724137931035, "grad_norm": 3.723371744155884, "learning_rate": 8.359505462411817e-06, "loss": 0.7868, "step": 5478 }, { "epoch": 1.5114482758620689, "grad_norm": 4.264772891998291, "learning_rate": 8.35882972059367e-06, "loss": 0.8663, "step": 5479 }, { "epoch": 1.5117241379310344, "grad_norm": 3.928318977355957, "learning_rate": 8.358153866954795e-06, "loss": 0.6549, "step": 5480 }, { "epoch": 1.512, "grad_norm": 3.8848140239715576, "learning_rate": 8.35747790151769e-06, "loss": 0.7178, "step": 5481 }, { "epoch": 1.5122758620689654, "grad_norm": 4.253180503845215, "learning_rate": 8.356801824304858e-06, "loss": 0.8078, "step": 5482 }, { "epoch": 1.5125517241379312, "grad_norm": 3.9500372409820557, "learning_rate": 8.356125635338808e-06, "loss": 0.7177, "step": 5483 }, { "epoch": 1.5128275862068965, "grad_norm": 3.902482032775879, "learning_rate": 8.35544933464205e-06, "loss": 0.9049, "step": 5484 }, { "epoch": 1.513103448275862, "grad_norm": 4.070252418518066, "learning_rate": 8.3547729222371e-06, "loss": 0.8059, "step": 5485 }, { "epoch": 1.5133793103448276, "grad_norm": 3.737725257873535, "learning_rate": 8.354096398146479e-06, "loss": 0.8497, "step": 5486 }, { "epoch": 1.513655172413793, "grad_norm": 3.7587504386901855, "learning_rate": 8.353419762392705e-06, "loss": 0.7791, "step": 5487 }, { "epoch": 1.5139310344827588, "grad_norm": 3.9153177738189697, "learning_rate": 8.352743014998307e-06, "loss": 0.8822, "step": 5488 }, { "epoch": 1.5142068965517241, "grad_norm": 4.012758731842041, "learning_rate": 8.352066155985816e-06, "loss": 0.7735, "step": 5489 }, { "epoch": 1.5144827586206897, "grad_norm": 3.3633241653442383, "learning_rate": 8.351389185377763e-06, "loss": 0.6855, "step": 5490 }, { "epoch": 1.5147586206896553, "grad_norm": 3.9216790199279785, "learning_rate": 8.350712103196688e-06, "loss": 0.8932, "step": 5491 }, { "epoch": 1.5150344827586206, "grad_norm": 3.8477401733398438, "learning_rate": 8.35003490946513e-06, "loss": 0.7726, "step": 5492 }, { "epoch": 1.5153103448275862, "grad_norm": 3.801349639892578, "learning_rate": 8.349357604205634e-06, "loss": 0.8477, "step": 5493 }, { "epoch": 1.5155862068965518, "grad_norm": 4.5500617027282715, "learning_rate": 8.34868018744075e-06, "loss": 0.7943, "step": 5494 }, { "epoch": 1.5158620689655171, "grad_norm": 3.4310295581817627, "learning_rate": 8.34800265919303e-06, "loss": 0.7578, "step": 5495 }, { "epoch": 1.516137931034483, "grad_norm": 3.90484881401062, "learning_rate": 8.347325019485028e-06, "loss": 0.875, "step": 5496 }, { "epoch": 1.5164137931034483, "grad_norm": 3.549379348754883, "learning_rate": 8.346647268339307e-06, "loss": 0.7164, "step": 5497 }, { "epoch": 1.5166896551724138, "grad_norm": 3.7673134803771973, "learning_rate": 8.34596940577843e-06, "loss": 0.7665, "step": 5498 }, { "epoch": 1.5169655172413794, "grad_norm": 4.226138591766357, "learning_rate": 8.345291431824959e-06, "loss": 0.8553, "step": 5499 }, { "epoch": 1.5172413793103448, "grad_norm": 3.7492706775665283, "learning_rate": 8.344613346501472e-06, "loss": 0.8742, "step": 5500 }, { "epoch": 1.5172413793103448, "eval_loss": 1.283280372619629, "eval_runtime": 13.6176, "eval_samples_per_second": 29.374, "eval_steps_per_second": 3.672, "step": 5500 }, { "epoch": 1.5175172413793103, "grad_norm": 4.059056758880615, "learning_rate": 8.343935149830537e-06, "loss": 0.7483, "step": 5501 }, { "epoch": 1.517793103448276, "grad_norm": 3.817758798599243, "learning_rate": 8.343256841834736e-06, "loss": 0.7958, "step": 5502 }, { "epoch": 1.5180689655172412, "grad_norm": 3.8887557983398438, "learning_rate": 8.342578422536652e-06, "loss": 0.8019, "step": 5503 }, { "epoch": 1.518344827586207, "grad_norm": 3.863804340362549, "learning_rate": 8.341899891958866e-06, "loss": 0.7501, "step": 5504 }, { "epoch": 1.5186206896551724, "grad_norm": 4.295115947723389, "learning_rate": 8.341221250123974e-06, "loss": 0.8545, "step": 5505 }, { "epoch": 1.518896551724138, "grad_norm": 4.275903701782227, "learning_rate": 8.340542497054564e-06, "loss": 0.933, "step": 5506 }, { "epoch": 1.5191724137931035, "grad_norm": 3.641462802886963, "learning_rate": 8.339863632773234e-06, "loss": 0.8544, "step": 5507 }, { "epoch": 1.5194482758620689, "grad_norm": 3.5639328956604004, "learning_rate": 8.339184657302584e-06, "loss": 0.7927, "step": 5508 }, { "epoch": 1.5197241379310344, "grad_norm": 4.167909622192383, "learning_rate": 8.338505570665218e-06, "loss": 0.9073, "step": 5509 }, { "epoch": 1.52, "grad_norm": 3.9131476879119873, "learning_rate": 8.337826372883745e-06, "loss": 0.7937, "step": 5510 }, { "epoch": 1.5202758620689654, "grad_norm": 4.266839981079102, "learning_rate": 8.337147063980778e-06, "loss": 0.8204, "step": 5511 }, { "epoch": 1.5205517241379312, "grad_norm": 3.940765142440796, "learning_rate": 8.33646764397893e-06, "loss": 0.8503, "step": 5512 }, { "epoch": 1.5208275862068965, "grad_norm": 4.224430561065674, "learning_rate": 8.33578811290082e-06, "loss": 0.8091, "step": 5513 }, { "epoch": 1.521103448275862, "grad_norm": 4.003821849822998, "learning_rate": 8.335108470769068e-06, "loss": 0.6902, "step": 5514 }, { "epoch": 1.5213793103448277, "grad_norm": 3.870776414871216, "learning_rate": 8.334428717606307e-06, "loss": 0.8614, "step": 5515 }, { "epoch": 1.521655172413793, "grad_norm": 3.8876614570617676, "learning_rate": 8.333748853435162e-06, "loss": 0.8994, "step": 5516 }, { "epoch": 1.5219310344827586, "grad_norm": 4.035717010498047, "learning_rate": 8.33306887827827e-06, "loss": 0.7953, "step": 5517 }, { "epoch": 1.5222068965517241, "grad_norm": 3.72544527053833, "learning_rate": 8.332388792158264e-06, "loss": 0.6882, "step": 5518 }, { "epoch": 1.5224827586206895, "grad_norm": 3.8860650062561035, "learning_rate": 8.33170859509779e-06, "loss": 0.8248, "step": 5519 }, { "epoch": 1.5227586206896553, "grad_norm": 3.743302583694458, "learning_rate": 8.331028287119489e-06, "loss": 0.958, "step": 5520 }, { "epoch": 1.5230344827586206, "grad_norm": 3.350512981414795, "learning_rate": 8.33034786824601e-06, "loss": 0.7121, "step": 5521 }, { "epoch": 1.5233103448275862, "grad_norm": 3.8878910541534424, "learning_rate": 8.32966733850001e-06, "loss": 0.6388, "step": 5522 }, { "epoch": 1.5235862068965518, "grad_norm": 4.074061393737793, "learning_rate": 8.328986697904138e-06, "loss": 0.8722, "step": 5523 }, { "epoch": 1.5238620689655171, "grad_norm": 4.13018798828125, "learning_rate": 8.32830594648106e-06, "loss": 0.823, "step": 5524 }, { "epoch": 1.524137931034483, "grad_norm": 3.946455240249634, "learning_rate": 8.327625084253432e-06, "loss": 0.794, "step": 5525 }, { "epoch": 1.5244137931034483, "grad_norm": 3.386155843734741, "learning_rate": 8.326944111243929e-06, "loss": 0.6918, "step": 5526 }, { "epoch": 1.5246896551724138, "grad_norm": 4.091860294342041, "learning_rate": 8.326263027475213e-06, "loss": 0.8613, "step": 5527 }, { "epoch": 1.5249655172413794, "grad_norm": 3.8335843086242676, "learning_rate": 8.325581832969968e-06, "loss": 0.7374, "step": 5528 }, { "epoch": 1.5252413793103448, "grad_norm": 3.8435192108154297, "learning_rate": 8.324900527750865e-06, "loss": 0.7969, "step": 5529 }, { "epoch": 1.5255172413793103, "grad_norm": 3.9412145614624023, "learning_rate": 8.324219111840588e-06, "loss": 0.7202, "step": 5530 }, { "epoch": 1.525793103448276, "grad_norm": 4.1942548751831055, "learning_rate": 8.323537585261823e-06, "loss": 0.7721, "step": 5531 }, { "epoch": 1.5260689655172412, "grad_norm": 4.011327266693115, "learning_rate": 8.322855948037259e-06, "loss": 0.8639, "step": 5532 }, { "epoch": 1.526344827586207, "grad_norm": 3.738898992538452, "learning_rate": 8.322174200189585e-06, "loss": 0.8309, "step": 5533 }, { "epoch": 1.5266206896551724, "grad_norm": 4.073704242706299, "learning_rate": 8.321492341741504e-06, "loss": 0.9321, "step": 5534 }, { "epoch": 1.526896551724138, "grad_norm": 4.4019775390625, "learning_rate": 8.32081037271571e-06, "loss": 0.8155, "step": 5535 }, { "epoch": 1.5271724137931035, "grad_norm": 4.103716850280762, "learning_rate": 8.320128293134913e-06, "loss": 0.9082, "step": 5536 }, { "epoch": 1.5274482758620689, "grad_norm": 3.942220449447632, "learning_rate": 8.319446103021813e-06, "loss": 0.7929, "step": 5537 }, { "epoch": 1.5277241379310345, "grad_norm": 3.9910829067230225, "learning_rate": 8.318763802399128e-06, "loss": 0.7663, "step": 5538 }, { "epoch": 1.528, "grad_norm": 4.141204357147217, "learning_rate": 8.31808139128957e-06, "loss": 0.8214, "step": 5539 }, { "epoch": 1.5282758620689654, "grad_norm": 3.765986442565918, "learning_rate": 8.317398869715857e-06, "loss": 0.8822, "step": 5540 }, { "epoch": 1.5285517241379312, "grad_norm": 4.519794940948486, "learning_rate": 8.316716237700711e-06, "loss": 0.784, "step": 5541 }, { "epoch": 1.5288275862068965, "grad_norm": 3.748011350631714, "learning_rate": 8.31603349526686e-06, "loss": 0.8444, "step": 5542 }, { "epoch": 1.529103448275862, "grad_norm": 3.4761850833892822, "learning_rate": 8.315350642437033e-06, "loss": 0.8044, "step": 5543 }, { "epoch": 1.5293793103448277, "grad_norm": 3.6878864765167236, "learning_rate": 8.314667679233962e-06, "loss": 0.716, "step": 5544 }, { "epoch": 1.529655172413793, "grad_norm": 4.161773681640625, "learning_rate": 8.313984605680384e-06, "loss": 0.8692, "step": 5545 }, { "epoch": 1.5299310344827586, "grad_norm": 3.8938169479370117, "learning_rate": 8.31330142179904e-06, "loss": 0.7529, "step": 5546 }, { "epoch": 1.5302068965517241, "grad_norm": 4.017953872680664, "learning_rate": 8.312618127612674e-06, "loss": 0.8028, "step": 5547 }, { "epoch": 1.5304827586206895, "grad_norm": 3.7932324409484863, "learning_rate": 8.311934723144037e-06, "loss": 0.8887, "step": 5548 }, { "epoch": 1.5307586206896553, "grad_norm": 3.8094048500061035, "learning_rate": 8.311251208415876e-06, "loss": 0.7411, "step": 5549 }, { "epoch": 1.5310344827586206, "grad_norm": 3.6967129707336426, "learning_rate": 8.310567583450949e-06, "loss": 0.8404, "step": 5550 }, { "epoch": 1.5313103448275862, "grad_norm": 3.7944118976593018, "learning_rate": 8.30988384827201e-06, "loss": 0.6747, "step": 5551 }, { "epoch": 1.5315862068965518, "grad_norm": 4.643133163452148, "learning_rate": 8.309200002901832e-06, "loss": 0.6836, "step": 5552 }, { "epoch": 1.5318620689655171, "grad_norm": 4.007950782775879, "learning_rate": 8.308516047363171e-06, "loss": 0.8769, "step": 5553 }, { "epoch": 1.532137931034483, "grad_norm": 3.6137259006500244, "learning_rate": 8.307831981678802e-06, "loss": 0.7197, "step": 5554 }, { "epoch": 1.5324137931034483, "grad_norm": 3.840085983276367, "learning_rate": 8.307147805871499e-06, "loss": 0.7598, "step": 5555 }, { "epoch": 1.5326896551724138, "grad_norm": 4.1111321449279785, "learning_rate": 8.306463519964036e-06, "loss": 0.8636, "step": 5556 }, { "epoch": 1.5329655172413794, "grad_norm": 3.995420455932617, "learning_rate": 8.305779123979196e-06, "loss": 0.7885, "step": 5557 }, { "epoch": 1.5332413793103448, "grad_norm": 3.6378087997436523, "learning_rate": 8.305094617939766e-06, "loss": 0.8088, "step": 5558 }, { "epoch": 1.5335172413793103, "grad_norm": 4.061079978942871, "learning_rate": 8.30441000186853e-06, "loss": 0.8625, "step": 5559 }, { "epoch": 1.533793103448276, "grad_norm": 3.9086685180664062, "learning_rate": 8.303725275788281e-06, "loss": 0.7772, "step": 5560 }, { "epoch": 1.5340689655172413, "grad_norm": 3.571702003479004, "learning_rate": 8.303040439721818e-06, "loss": 0.7564, "step": 5561 }, { "epoch": 1.534344827586207, "grad_norm": 4.315351486206055, "learning_rate": 8.302355493691934e-06, "loss": 0.8001, "step": 5562 }, { "epoch": 1.5346206896551724, "grad_norm": 4.094563007354736, "learning_rate": 8.301670437721438e-06, "loss": 0.7498, "step": 5563 }, { "epoch": 1.534896551724138, "grad_norm": 3.8502442836761475, "learning_rate": 8.300985271833133e-06, "loss": 0.8732, "step": 5564 }, { "epoch": 1.5351724137931035, "grad_norm": 4.051810264587402, "learning_rate": 8.300299996049832e-06, "loss": 0.7751, "step": 5565 }, { "epoch": 1.535448275862069, "grad_norm": 4.169382095336914, "learning_rate": 8.299614610394345e-06, "loss": 0.9731, "step": 5566 }, { "epoch": 1.5357241379310345, "grad_norm": 4.5278425216674805, "learning_rate": 8.298929114889493e-06, "loss": 0.914, "step": 5567 }, { "epoch": 1.536, "grad_norm": 4.180009841918945, "learning_rate": 8.298243509558098e-06, "loss": 0.8385, "step": 5568 }, { "epoch": 1.5362758620689654, "grad_norm": 4.096649646759033, "learning_rate": 8.29755779442298e-06, "loss": 0.9778, "step": 5569 }, { "epoch": 1.5365517241379312, "grad_norm": 4.01125431060791, "learning_rate": 8.296871969506972e-06, "loss": 0.7881, "step": 5570 }, { "epoch": 1.5368275862068965, "grad_norm": 3.861898899078369, "learning_rate": 8.296186034832906e-06, "loss": 0.7684, "step": 5571 }, { "epoch": 1.537103448275862, "grad_norm": 3.9552886486053467, "learning_rate": 8.295499990423614e-06, "loss": 0.8124, "step": 5572 }, { "epoch": 1.5373793103448277, "grad_norm": 3.6623470783233643, "learning_rate": 8.294813836301939e-06, "loss": 0.6677, "step": 5573 }, { "epoch": 1.537655172413793, "grad_norm": 3.9252471923828125, "learning_rate": 8.294127572490722e-06, "loss": 0.7285, "step": 5574 }, { "epoch": 1.5379310344827586, "grad_norm": 3.6518237590789795, "learning_rate": 8.293441199012813e-06, "loss": 0.922, "step": 5575 }, { "epoch": 1.5382068965517242, "grad_norm": 3.776658773422241, "learning_rate": 8.29275471589106e-06, "loss": 0.7034, "step": 5576 }, { "epoch": 1.5384827586206895, "grad_norm": 4.14267110824585, "learning_rate": 8.292068123148318e-06, "loss": 0.8759, "step": 5577 }, { "epoch": 1.5387586206896553, "grad_norm": 4.266589641571045, "learning_rate": 8.291381420807443e-06, "loss": 0.8506, "step": 5578 }, { "epoch": 1.5390344827586206, "grad_norm": 4.39073371887207, "learning_rate": 8.290694608891298e-06, "loss": 0.7379, "step": 5579 }, { "epoch": 1.5393103448275862, "grad_norm": 4.243650436401367, "learning_rate": 8.290007687422749e-06, "loss": 0.8219, "step": 5580 }, { "epoch": 1.5395862068965518, "grad_norm": 3.7559075355529785, "learning_rate": 8.289320656424664e-06, "loss": 0.8292, "step": 5581 }, { "epoch": 1.5398620689655171, "grad_norm": 3.930753469467163, "learning_rate": 8.288633515919912e-06, "loss": 0.7159, "step": 5582 }, { "epoch": 1.5401379310344827, "grad_norm": 3.803767204284668, "learning_rate": 8.287946265931374e-06, "loss": 0.7325, "step": 5583 }, { "epoch": 1.5404137931034483, "grad_norm": 4.2278265953063965, "learning_rate": 8.287258906481927e-06, "loss": 0.7085, "step": 5584 }, { "epoch": 1.5406896551724136, "grad_norm": 4.195034027099609, "learning_rate": 8.286571437594455e-06, "loss": 0.8634, "step": 5585 }, { "epoch": 1.5409655172413794, "grad_norm": 4.182888984680176, "learning_rate": 8.285883859291843e-06, "loss": 0.8554, "step": 5586 }, { "epoch": 1.5412413793103448, "grad_norm": 3.8308653831481934, "learning_rate": 8.285196171596985e-06, "loss": 0.83, "step": 5587 }, { "epoch": 1.5415172413793103, "grad_norm": 4.077643394470215, "learning_rate": 8.284508374532772e-06, "loss": 0.7585, "step": 5588 }, { "epoch": 1.541793103448276, "grad_norm": 4.495038986206055, "learning_rate": 8.283820468122104e-06, "loss": 0.9269, "step": 5589 }, { "epoch": 1.5420689655172413, "grad_norm": 3.647531032562256, "learning_rate": 8.283132452387883e-06, "loss": 0.6281, "step": 5590 }, { "epoch": 1.542344827586207, "grad_norm": 3.675588846206665, "learning_rate": 8.282444327353012e-06, "loss": 0.8281, "step": 5591 }, { "epoch": 1.5426206896551724, "grad_norm": 4.154388427734375, "learning_rate": 8.2817560930404e-06, "loss": 0.843, "step": 5592 }, { "epoch": 1.542896551724138, "grad_norm": 3.7761363983154297, "learning_rate": 8.28106774947296e-06, "loss": 0.7781, "step": 5593 }, { "epoch": 1.5431724137931035, "grad_norm": 3.490161180496216, "learning_rate": 8.280379296673609e-06, "loss": 0.8563, "step": 5594 }, { "epoch": 1.543448275862069, "grad_norm": 4.238533973693848, "learning_rate": 8.279690734665264e-06, "loss": 0.8416, "step": 5595 }, { "epoch": 1.5437241379310345, "grad_norm": 3.940300703048706, "learning_rate": 8.279002063470852e-06, "loss": 0.8226, "step": 5596 }, { "epoch": 1.544, "grad_norm": 3.583282709121704, "learning_rate": 8.278313283113297e-06, "loss": 0.6866, "step": 5597 }, { "epoch": 1.5442758620689654, "grad_norm": 3.7996373176574707, "learning_rate": 8.27762439361553e-06, "loss": 0.8078, "step": 5598 }, { "epoch": 1.5445517241379312, "grad_norm": 3.90152907371521, "learning_rate": 8.276935395000489e-06, "loss": 0.7977, "step": 5599 }, { "epoch": 1.5448275862068965, "grad_norm": 4.2053542137146, "learning_rate": 8.276246287291107e-06, "loss": 0.8177, "step": 5600 }, { "epoch": 1.545103448275862, "grad_norm": 3.978181838989258, "learning_rate": 8.275557070510324e-06, "loss": 0.7943, "step": 5601 }, { "epoch": 1.5453793103448277, "grad_norm": 4.110058784484863, "learning_rate": 8.274867744681092e-06, "loss": 0.7932, "step": 5602 }, { "epoch": 1.545655172413793, "grad_norm": 3.9818990230560303, "learning_rate": 8.274178309826355e-06, "loss": 0.7804, "step": 5603 }, { "epoch": 1.5459310344827586, "grad_norm": 3.628688335418701, "learning_rate": 8.273488765969067e-06, "loss": 0.8064, "step": 5604 }, { "epoch": 1.5462068965517242, "grad_norm": 3.885662794113159, "learning_rate": 8.272799113132182e-06, "loss": 0.7431, "step": 5605 }, { "epoch": 1.5464827586206895, "grad_norm": 3.8343045711517334, "learning_rate": 8.272109351338661e-06, "loss": 0.7178, "step": 5606 }, { "epoch": 1.5467586206896553, "grad_norm": 3.7673981189727783, "learning_rate": 8.271419480611468e-06, "loss": 0.6595, "step": 5607 }, { "epoch": 1.5470344827586207, "grad_norm": 4.356435298919678, "learning_rate": 8.27072950097357e-06, "loss": 0.8159, "step": 5608 }, { "epoch": 1.5473103448275862, "grad_norm": 4.461798191070557, "learning_rate": 8.270039412447937e-06, "loss": 0.8398, "step": 5609 }, { "epoch": 1.5475862068965518, "grad_norm": 3.935072183609009, "learning_rate": 8.26934921505754e-06, "loss": 0.85, "step": 5610 }, { "epoch": 1.5478620689655171, "grad_norm": 4.412812232971191, "learning_rate": 8.268658908825362e-06, "loss": 0.8681, "step": 5611 }, { "epoch": 1.5481379310344827, "grad_norm": 4.007415294647217, "learning_rate": 8.26796849377438e-06, "loss": 0.7871, "step": 5612 }, { "epoch": 1.5484137931034483, "grad_norm": 3.7225699424743652, "learning_rate": 8.267277969927584e-06, "loss": 0.8626, "step": 5613 }, { "epoch": 1.5486896551724136, "grad_norm": 4.0716047286987305, "learning_rate": 8.266587337307957e-06, "loss": 0.8563, "step": 5614 }, { "epoch": 1.5489655172413794, "grad_norm": 3.987919569015503, "learning_rate": 8.265896595938494e-06, "loss": 0.793, "step": 5615 }, { "epoch": 1.5492413793103448, "grad_norm": 3.836747646331787, "learning_rate": 8.26520574584219e-06, "loss": 0.7871, "step": 5616 }, { "epoch": 1.5495172413793104, "grad_norm": 3.5119388103485107, "learning_rate": 8.264514787042047e-06, "loss": 0.7438, "step": 5617 }, { "epoch": 1.549793103448276, "grad_norm": 3.8610832691192627, "learning_rate": 8.263823719561067e-06, "loss": 0.7248, "step": 5618 }, { "epoch": 1.5500689655172413, "grad_norm": 3.810814619064331, "learning_rate": 8.263132543422252e-06, "loss": 0.7374, "step": 5619 }, { "epoch": 1.5503448275862068, "grad_norm": 4.078950881958008, "learning_rate": 8.26244125864862e-06, "loss": 0.8685, "step": 5620 }, { "epoch": 1.5506206896551724, "grad_norm": 3.575896978378296, "learning_rate": 8.26174986526318e-06, "loss": 0.8218, "step": 5621 }, { "epoch": 1.550896551724138, "grad_norm": 3.722102403640747, "learning_rate": 8.261058363288951e-06, "loss": 0.7109, "step": 5622 }, { "epoch": 1.5511724137931036, "grad_norm": 4.011275768280029, "learning_rate": 8.260366752748954e-06, "loss": 0.8962, "step": 5623 }, { "epoch": 1.551448275862069, "grad_norm": 4.373693943023682, "learning_rate": 8.259675033666215e-06, "loss": 0.8185, "step": 5624 }, { "epoch": 1.5517241379310345, "grad_norm": 3.829171895980835, "learning_rate": 8.25898320606376e-06, "loss": 0.791, "step": 5625 }, { "epoch": 1.552, "grad_norm": 4.212989330291748, "learning_rate": 8.258291269964622e-06, "loss": 0.8087, "step": 5626 }, { "epoch": 1.5522758620689654, "grad_norm": 3.5833284854888916, "learning_rate": 8.257599225391836e-06, "loss": 0.7027, "step": 5627 }, { "epoch": 1.5525517241379312, "grad_norm": 3.948181629180908, "learning_rate": 8.256907072368445e-06, "loss": 0.8842, "step": 5628 }, { "epoch": 1.5528275862068965, "grad_norm": 3.9706573486328125, "learning_rate": 8.256214810917487e-06, "loss": 0.7215, "step": 5629 }, { "epoch": 1.553103448275862, "grad_norm": 3.631509304046631, "learning_rate": 8.255522441062012e-06, "loss": 0.7892, "step": 5630 }, { "epoch": 1.5533793103448277, "grad_norm": 4.204546928405762, "learning_rate": 8.254829962825069e-06, "loss": 0.8938, "step": 5631 }, { "epoch": 1.553655172413793, "grad_norm": 4.0836501121521, "learning_rate": 8.25413737622971e-06, "loss": 0.8263, "step": 5632 }, { "epoch": 1.5539310344827586, "grad_norm": 3.7361016273498535, "learning_rate": 8.253444681298993e-06, "loss": 0.7496, "step": 5633 }, { "epoch": 1.5542068965517242, "grad_norm": 3.473843812942505, "learning_rate": 8.25275187805598e-06, "loss": 0.7606, "step": 5634 }, { "epoch": 1.5544827586206895, "grad_norm": 4.006436347961426, "learning_rate": 8.252058966523738e-06, "loss": 0.7649, "step": 5635 }, { "epoch": 1.5547586206896553, "grad_norm": 4.150306224822998, "learning_rate": 8.251365946725326e-06, "loss": 0.7952, "step": 5636 }, { "epoch": 1.5550344827586207, "grad_norm": 3.5408143997192383, "learning_rate": 8.250672818683827e-06, "loss": 0.8481, "step": 5637 }, { "epoch": 1.5553103448275862, "grad_norm": 3.793224334716797, "learning_rate": 8.249979582422309e-06, "loss": 0.8434, "step": 5638 }, { "epoch": 1.5555862068965518, "grad_norm": 4.09500789642334, "learning_rate": 8.249286237963855e-06, "loss": 0.8607, "step": 5639 }, { "epoch": 1.5558620689655172, "grad_norm": 3.685434103012085, "learning_rate": 8.248592785331544e-06, "loss": 0.7123, "step": 5640 }, { "epoch": 1.5561379310344827, "grad_norm": 3.9291207790374756, "learning_rate": 8.247899224548463e-06, "loss": 0.7803, "step": 5641 }, { "epoch": 1.5564137931034483, "grad_norm": 3.669658660888672, "learning_rate": 8.247205555637704e-06, "loss": 0.7948, "step": 5642 }, { "epoch": 1.5566896551724136, "grad_norm": 4.181669235229492, "learning_rate": 8.24651177862236e-06, "loss": 0.8895, "step": 5643 }, { "epoch": 1.5569655172413794, "grad_norm": 3.9746758937835693, "learning_rate": 8.245817893525525e-06, "loss": 0.8125, "step": 5644 }, { "epoch": 1.5572413793103448, "grad_norm": 4.466091156005859, "learning_rate": 8.245123900370302e-06, "loss": 0.9978, "step": 5645 }, { "epoch": 1.5575172413793104, "grad_norm": 3.780461549758911, "learning_rate": 8.244429799179793e-06, "loss": 0.7864, "step": 5646 }, { "epoch": 1.557793103448276, "grad_norm": 3.638866424560547, "learning_rate": 8.243735589977109e-06, "loss": 0.824, "step": 5647 }, { "epoch": 1.5580689655172413, "grad_norm": 3.5779595375061035, "learning_rate": 8.243041272785359e-06, "loss": 0.8099, "step": 5648 }, { "epoch": 1.5583448275862068, "grad_norm": 4.096468925476074, "learning_rate": 8.242346847627657e-06, "loss": 0.9008, "step": 5649 }, { "epoch": 1.5586206896551724, "grad_norm": 4.174032688140869, "learning_rate": 8.241652314527126e-06, "loss": 0.8574, "step": 5650 }, { "epoch": 1.5588965517241378, "grad_norm": 4.189253807067871, "learning_rate": 8.240957673506881e-06, "loss": 0.7876, "step": 5651 }, { "epoch": 1.5591724137931036, "grad_norm": 3.9539523124694824, "learning_rate": 8.240262924590054e-06, "loss": 0.7474, "step": 5652 }, { "epoch": 1.559448275862069, "grad_norm": 3.7355213165283203, "learning_rate": 8.239568067799772e-06, "loss": 0.7384, "step": 5653 }, { "epoch": 1.5597241379310345, "grad_norm": 3.6607465744018555, "learning_rate": 8.238873103159168e-06, "loss": 0.8492, "step": 5654 }, { "epoch": 1.56, "grad_norm": 3.6170034408569336, "learning_rate": 8.238178030691377e-06, "loss": 0.6828, "step": 5655 }, { "epoch": 1.5602758620689654, "grad_norm": 4.031854629516602, "learning_rate": 8.237482850419542e-06, "loss": 0.8086, "step": 5656 }, { "epoch": 1.5605517241379312, "grad_norm": 3.8783152103424072, "learning_rate": 8.236787562366803e-06, "loss": 0.7034, "step": 5657 }, { "epoch": 1.5608275862068965, "grad_norm": 3.5705552101135254, "learning_rate": 8.23609216655631e-06, "loss": 0.8021, "step": 5658 }, { "epoch": 1.5611034482758621, "grad_norm": 4.466143608093262, "learning_rate": 8.235396663011214e-06, "loss": 0.9293, "step": 5659 }, { "epoch": 1.5613793103448277, "grad_norm": 3.845841646194458, "learning_rate": 8.234701051754669e-06, "loss": 0.6673, "step": 5660 }, { "epoch": 1.561655172413793, "grad_norm": 3.7443158626556396, "learning_rate": 8.234005332809831e-06, "loss": 0.7534, "step": 5661 }, { "epoch": 1.5619310344827586, "grad_norm": 3.70154070854187, "learning_rate": 8.233309506199862e-06, "loss": 0.7535, "step": 5662 }, { "epoch": 1.5622068965517242, "grad_norm": 3.923002243041992, "learning_rate": 8.23261357194793e-06, "loss": 0.8285, "step": 5663 }, { "epoch": 1.5624827586206895, "grad_norm": 3.7065589427948, "learning_rate": 8.231917530077203e-06, "loss": 0.7817, "step": 5664 }, { "epoch": 1.5627586206896553, "grad_norm": 3.6964263916015625, "learning_rate": 8.23122138061085e-06, "loss": 0.7179, "step": 5665 }, { "epoch": 1.5630344827586207, "grad_norm": 4.136791706085205, "learning_rate": 8.230525123572051e-06, "loss": 0.772, "step": 5666 }, { "epoch": 1.5633103448275862, "grad_norm": 4.437556266784668, "learning_rate": 8.229828758983983e-06, "loss": 0.9931, "step": 5667 }, { "epoch": 1.5635862068965518, "grad_norm": 4.406495094299316, "learning_rate": 8.229132286869828e-06, "loss": 0.9167, "step": 5668 }, { "epoch": 1.5638620689655172, "grad_norm": 3.5524821281433105, "learning_rate": 8.228435707252777e-06, "loss": 0.7748, "step": 5669 }, { "epoch": 1.5641379310344827, "grad_norm": 4.073645114898682, "learning_rate": 8.227739020156016e-06, "loss": 0.8284, "step": 5670 }, { "epoch": 1.5644137931034483, "grad_norm": 3.4896371364593506, "learning_rate": 8.227042225602742e-06, "loss": 0.6655, "step": 5671 }, { "epoch": 1.5646896551724137, "grad_norm": 3.812636375427246, "learning_rate": 8.226345323616148e-06, "loss": 0.8179, "step": 5672 }, { "epoch": 1.5649655172413794, "grad_norm": 3.7629494667053223, "learning_rate": 8.225648314219441e-06, "loss": 0.9808, "step": 5673 }, { "epoch": 1.5652413793103448, "grad_norm": 3.746706485748291, "learning_rate": 8.22495119743582e-06, "loss": 0.835, "step": 5674 }, { "epoch": 1.5655172413793104, "grad_norm": 3.9412646293640137, "learning_rate": 8.224253973288499e-06, "loss": 0.7466, "step": 5675 }, { "epoch": 1.565793103448276, "grad_norm": 3.860402822494507, "learning_rate": 8.223556641800683e-06, "loss": 0.7484, "step": 5676 }, { "epoch": 1.5660689655172413, "grad_norm": 4.104809761047363, "learning_rate": 8.22285920299559e-06, "loss": 0.8307, "step": 5677 }, { "epoch": 1.5663448275862069, "grad_norm": 3.9212863445281982, "learning_rate": 8.22216165689644e-06, "loss": 0.8222, "step": 5678 }, { "epoch": 1.5666206896551724, "grad_norm": 4.230958938598633, "learning_rate": 8.221464003526456e-06, "loss": 0.8742, "step": 5679 }, { "epoch": 1.5668965517241378, "grad_norm": 3.685093402862549, "learning_rate": 8.220766242908862e-06, "loss": 0.8701, "step": 5680 }, { "epoch": 1.5671724137931036, "grad_norm": 3.662602186203003, "learning_rate": 8.220068375066887e-06, "loss": 0.7061, "step": 5681 }, { "epoch": 1.567448275862069, "grad_norm": 3.5779271125793457, "learning_rate": 8.219370400023767e-06, "loss": 0.831, "step": 5682 }, { "epoch": 1.5677241379310345, "grad_norm": 3.7158591747283936, "learning_rate": 8.218672317802737e-06, "loss": 0.7812, "step": 5683 }, { "epoch": 1.568, "grad_norm": 3.792431592941284, "learning_rate": 8.217974128427036e-06, "loss": 0.7092, "step": 5684 }, { "epoch": 1.5682758620689654, "grad_norm": 3.625243663787842, "learning_rate": 8.21727583191991e-06, "loss": 0.7316, "step": 5685 }, { "epoch": 1.568551724137931, "grad_norm": 3.7506840229034424, "learning_rate": 8.216577428304604e-06, "loss": 0.7718, "step": 5686 }, { "epoch": 1.5688275862068966, "grad_norm": 4.17539644241333, "learning_rate": 8.215878917604372e-06, "loss": 0.7714, "step": 5687 }, { "epoch": 1.5691034482758621, "grad_norm": 4.194620609283447, "learning_rate": 8.215180299842466e-06, "loss": 0.7419, "step": 5688 }, { "epoch": 1.5693793103448277, "grad_norm": 3.813096761703491, "learning_rate": 8.214481575042145e-06, "loss": 0.7269, "step": 5689 }, { "epoch": 1.569655172413793, "grad_norm": 3.4653236865997314, "learning_rate": 8.213782743226671e-06, "loss": 0.8214, "step": 5690 }, { "epoch": 1.5699310344827586, "grad_norm": 4.233172416687012, "learning_rate": 8.21308380441931e-06, "loss": 0.8531, "step": 5691 }, { "epoch": 1.5702068965517242, "grad_norm": 4.196225643157959, "learning_rate": 8.212384758643329e-06, "loss": 0.8324, "step": 5692 }, { "epoch": 1.5704827586206895, "grad_norm": 4.315441608428955, "learning_rate": 8.211685605922e-06, "loss": 0.9446, "step": 5693 }, { "epoch": 1.5707586206896553, "grad_norm": 3.613068103790283, "learning_rate": 8.2109863462786e-06, "loss": 0.7926, "step": 5694 }, { "epoch": 1.5710344827586207, "grad_norm": 4.420473575592041, "learning_rate": 8.21028697973641e-06, "loss": 0.9196, "step": 5695 }, { "epoch": 1.5713103448275862, "grad_norm": 3.61003041267395, "learning_rate": 8.209587506318711e-06, "loss": 0.826, "step": 5696 }, { "epoch": 1.5715862068965518, "grad_norm": 5.273193359375, "learning_rate": 8.208887926048788e-06, "loss": 0.7791, "step": 5697 }, { "epoch": 1.5718620689655172, "grad_norm": 3.83422589302063, "learning_rate": 8.208188238949935e-06, "loss": 0.8836, "step": 5698 }, { "epoch": 1.5721379310344827, "grad_norm": 3.8298532962799072, "learning_rate": 8.207488445045443e-06, "loss": 0.8421, "step": 5699 }, { "epoch": 1.5724137931034483, "grad_norm": 3.8152289390563965, "learning_rate": 8.20678854435861e-06, "loss": 0.9344, "step": 5700 }, { "epoch": 1.5726896551724137, "grad_norm": 3.45721173286438, "learning_rate": 8.206088536912736e-06, "loss": 0.6917, "step": 5701 }, { "epoch": 1.5729655172413795, "grad_norm": 3.7395009994506836, "learning_rate": 8.205388422731128e-06, "loss": 0.6853, "step": 5702 }, { "epoch": 1.5732413793103448, "grad_norm": 4.008810520172119, "learning_rate": 8.20468820183709e-06, "loss": 0.8409, "step": 5703 }, { "epoch": 1.5735172413793104, "grad_norm": 4.051943778991699, "learning_rate": 8.203987874253938e-06, "loss": 0.9021, "step": 5704 }, { "epoch": 1.573793103448276, "grad_norm": 4.829927444458008, "learning_rate": 8.203287440004982e-06, "loss": 0.8499, "step": 5705 }, { "epoch": 1.5740689655172413, "grad_norm": 3.8523154258728027, "learning_rate": 8.202586899113546e-06, "loss": 0.7016, "step": 5706 }, { "epoch": 1.5743448275862069, "grad_norm": 3.7507495880126953, "learning_rate": 8.201886251602946e-06, "loss": 0.7152, "step": 5707 }, { "epoch": 1.5746206896551724, "grad_norm": 3.628671407699585, "learning_rate": 8.201185497496512e-06, "loss": 0.7534, "step": 5708 }, { "epoch": 1.5748965517241378, "grad_norm": 3.6800947189331055, "learning_rate": 8.200484636817571e-06, "loss": 0.6838, "step": 5709 }, { "epoch": 1.5751724137931036, "grad_norm": 4.0290632247924805, "learning_rate": 8.19978366958946e-06, "loss": 0.8989, "step": 5710 }, { "epoch": 1.575448275862069, "grad_norm": 3.9537525177001953, "learning_rate": 8.19908259583551e-06, "loss": 0.8781, "step": 5711 }, { "epoch": 1.5757241379310345, "grad_norm": 4.219615936279297, "learning_rate": 8.198381415579062e-06, "loss": 0.9698, "step": 5712 }, { "epoch": 1.576, "grad_norm": 3.929001808166504, "learning_rate": 8.197680128843462e-06, "loss": 0.9207, "step": 5713 }, { "epoch": 1.5762758620689654, "grad_norm": 4.156789779663086, "learning_rate": 8.196978735652055e-06, "loss": 0.9046, "step": 5714 }, { "epoch": 1.576551724137931, "grad_norm": 3.8217387199401855, "learning_rate": 8.196277236028192e-06, "loss": 0.7609, "step": 5715 }, { "epoch": 1.5768275862068966, "grad_norm": 3.5364153385162354, "learning_rate": 8.195575629995227e-06, "loss": 0.7579, "step": 5716 }, { "epoch": 1.577103448275862, "grad_norm": 4.011916160583496, "learning_rate": 8.194873917576515e-06, "loss": 0.7938, "step": 5717 }, { "epoch": 1.5773793103448277, "grad_norm": 3.7996037006378174, "learning_rate": 8.194172098795421e-06, "loss": 0.9031, "step": 5718 }, { "epoch": 1.577655172413793, "grad_norm": 3.841081142425537, "learning_rate": 8.193470173675309e-06, "loss": 0.9968, "step": 5719 }, { "epoch": 1.5779310344827586, "grad_norm": 3.731135845184326, "learning_rate": 8.192768142239546e-06, "loss": 0.8798, "step": 5720 }, { "epoch": 1.5782068965517242, "grad_norm": 4.018504619598389, "learning_rate": 8.192066004511504e-06, "loss": 0.7761, "step": 5721 }, { "epoch": 1.5784827586206895, "grad_norm": 3.6828343868255615, "learning_rate": 8.191363760514557e-06, "loss": 0.7844, "step": 5722 }, { "epoch": 1.5787586206896553, "grad_norm": 3.9830002784729004, "learning_rate": 8.190661410272086e-06, "loss": 0.8718, "step": 5723 }, { "epoch": 1.5790344827586207, "grad_norm": 3.7874534130096436, "learning_rate": 8.189958953807472e-06, "loss": 0.9042, "step": 5724 }, { "epoch": 1.5793103448275863, "grad_norm": 3.7495954036712646, "learning_rate": 8.189256391144102e-06, "loss": 0.7042, "step": 5725 }, { "epoch": 1.5795862068965518, "grad_norm": 3.6861608028411865, "learning_rate": 8.188553722305365e-06, "loss": 0.7671, "step": 5726 }, { "epoch": 1.5798620689655172, "grad_norm": 3.7885618209838867, "learning_rate": 8.187850947314653e-06, "loss": 0.9108, "step": 5727 }, { "epoch": 1.5801379310344827, "grad_norm": 4.282500743865967, "learning_rate": 8.187148066195364e-06, "loss": 0.8854, "step": 5728 }, { "epoch": 1.5804137931034483, "grad_norm": 4.498382091522217, "learning_rate": 8.186445078970897e-06, "loss": 0.726, "step": 5729 }, { "epoch": 1.5806896551724137, "grad_norm": 3.6129586696624756, "learning_rate": 8.185741985664655e-06, "loss": 0.7202, "step": 5730 }, { "epoch": 1.5809655172413795, "grad_norm": 4.5223846435546875, "learning_rate": 8.185038786300047e-06, "loss": 0.7842, "step": 5731 }, { "epoch": 1.5812413793103448, "grad_norm": 3.778284788131714, "learning_rate": 8.184335480900482e-06, "loss": 0.8297, "step": 5732 }, { "epoch": 1.5815172413793104, "grad_norm": 3.854522943496704, "learning_rate": 8.183632069489375e-06, "loss": 0.9347, "step": 5733 }, { "epoch": 1.581793103448276, "grad_norm": 3.5048182010650635, "learning_rate": 8.182928552090143e-06, "loss": 0.5857, "step": 5734 }, { "epoch": 1.5820689655172413, "grad_norm": 4.071353435516357, "learning_rate": 8.182224928726207e-06, "loss": 0.7996, "step": 5735 }, { "epoch": 1.5823448275862069, "grad_norm": 3.947462320327759, "learning_rate": 8.181521199420994e-06, "loss": 0.7805, "step": 5736 }, { "epoch": 1.5826206896551724, "grad_norm": 3.9037468433380127, "learning_rate": 8.180817364197927e-06, "loss": 0.7804, "step": 5737 }, { "epoch": 1.5828965517241378, "grad_norm": 4.236401081085205, "learning_rate": 8.180113423080445e-06, "loss": 0.8642, "step": 5738 }, { "epoch": 1.5831724137931036, "grad_norm": 3.7353572845458984, "learning_rate": 8.179409376091978e-06, "loss": 0.7025, "step": 5739 }, { "epoch": 1.583448275862069, "grad_norm": 4.331631660461426, "learning_rate": 8.178705223255968e-06, "loss": 0.8658, "step": 5740 }, { "epoch": 1.5837241379310345, "grad_norm": 4.163148880004883, "learning_rate": 8.178000964595854e-06, "loss": 0.7774, "step": 5741 }, { "epoch": 1.584, "grad_norm": 3.7838172912597656, "learning_rate": 8.177296600135082e-06, "loss": 0.7628, "step": 5742 }, { "epoch": 1.5842758620689654, "grad_norm": 4.179683685302734, "learning_rate": 8.176592129897106e-06, "loss": 0.8788, "step": 5743 }, { "epoch": 1.584551724137931, "grad_norm": 3.8612289428710938, "learning_rate": 8.175887553905375e-06, "loss": 0.8191, "step": 5744 }, { "epoch": 1.5848275862068966, "grad_norm": 4.147598743438721, "learning_rate": 8.175182872183346e-06, "loss": 0.7323, "step": 5745 }, { "epoch": 1.585103448275862, "grad_norm": 3.713747262954712, "learning_rate": 8.17447808475448e-06, "loss": 0.7914, "step": 5746 }, { "epoch": 1.5853793103448277, "grad_norm": 3.560713768005371, "learning_rate": 8.17377319164224e-06, "loss": 0.7507, "step": 5747 }, { "epoch": 1.585655172413793, "grad_norm": 3.8940672874450684, "learning_rate": 8.173068192870092e-06, "loss": 0.9338, "step": 5748 }, { "epoch": 1.5859310344827586, "grad_norm": 3.776406764984131, "learning_rate": 8.172363088461507e-06, "loss": 0.7224, "step": 5749 }, { "epoch": 1.5862068965517242, "grad_norm": 4.285373210906982, "learning_rate": 8.17165787843996e-06, "loss": 0.7599, "step": 5750 }, { "epoch": 1.5864827586206895, "grad_norm": 3.548036813735962, "learning_rate": 8.17095256282893e-06, "loss": 0.7428, "step": 5751 }, { "epoch": 1.5867586206896551, "grad_norm": 4.019909858703613, "learning_rate": 8.170247141651893e-06, "loss": 0.753, "step": 5752 }, { "epoch": 1.5870344827586207, "grad_norm": 4.014415264129639, "learning_rate": 8.169541614932337e-06, "loss": 0.8096, "step": 5753 }, { "epoch": 1.5873103448275863, "grad_norm": 4.051895618438721, "learning_rate": 8.168835982693752e-06, "loss": 0.6776, "step": 5754 }, { "epoch": 1.5875862068965518, "grad_norm": 3.899624824523926, "learning_rate": 8.168130244959627e-06, "loss": 0.8791, "step": 5755 }, { "epoch": 1.5878620689655172, "grad_norm": 3.959832191467285, "learning_rate": 8.167424401753456e-06, "loss": 0.8227, "step": 5756 }, { "epoch": 1.5881379310344828, "grad_norm": 4.035165309906006, "learning_rate": 8.166718453098739e-06, "loss": 0.8502, "step": 5757 }, { "epoch": 1.5884137931034483, "grad_norm": 3.7706081867218018, "learning_rate": 8.166012399018979e-06, "loss": 0.8679, "step": 5758 }, { "epoch": 1.5886896551724137, "grad_norm": 4.240692615509033, "learning_rate": 8.165306239537681e-06, "loss": 0.7919, "step": 5759 }, { "epoch": 1.5889655172413795, "grad_norm": 4.150822162628174, "learning_rate": 8.164599974678353e-06, "loss": 0.8456, "step": 5760 }, { "epoch": 1.5892413793103448, "grad_norm": 4.345170497894287, "learning_rate": 8.16389360446451e-06, "loss": 0.948, "step": 5761 }, { "epoch": 1.5895172413793104, "grad_norm": 4.281360626220703, "learning_rate": 8.163187128919666e-06, "loss": 0.8423, "step": 5762 }, { "epoch": 1.589793103448276, "grad_norm": 3.8724253177642822, "learning_rate": 8.162480548067341e-06, "loss": 0.7147, "step": 5763 }, { "epoch": 1.5900689655172413, "grad_norm": 4.135367393493652, "learning_rate": 8.16177386193106e-06, "loss": 0.7917, "step": 5764 }, { "epoch": 1.5903448275862069, "grad_norm": 3.9598495960235596, "learning_rate": 8.161067070534348e-06, "loss": 0.8795, "step": 5765 }, { "epoch": 1.5906206896551724, "grad_norm": 4.097618103027344, "learning_rate": 8.160360173900736e-06, "loss": 0.8408, "step": 5766 }, { "epoch": 1.5908965517241378, "grad_norm": 4.356884002685547, "learning_rate": 8.159653172053758e-06, "loss": 0.8471, "step": 5767 }, { "epoch": 1.5911724137931036, "grad_norm": 4.212899208068848, "learning_rate": 8.15894606501695e-06, "loss": 0.8424, "step": 5768 }, { "epoch": 1.591448275862069, "grad_norm": 4.261407852172852, "learning_rate": 8.15823885281385e-06, "loss": 0.7698, "step": 5769 }, { "epoch": 1.5917241379310345, "grad_norm": 4.0180816650390625, "learning_rate": 8.157531535468011e-06, "loss": 0.8221, "step": 5770 }, { "epoch": 1.592, "grad_norm": 4.428717613220215, "learning_rate": 8.156824113002972e-06, "loss": 0.7701, "step": 5771 }, { "epoch": 1.5922758620689654, "grad_norm": 4.851956844329834, "learning_rate": 8.156116585442288e-06, "loss": 0.93, "step": 5772 }, { "epoch": 1.592551724137931, "grad_norm": 4.1476149559021, "learning_rate": 8.155408952809512e-06, "loss": 0.8647, "step": 5773 }, { "epoch": 1.5928275862068966, "grad_norm": 4.0359954833984375, "learning_rate": 8.154701215128204e-06, "loss": 0.8342, "step": 5774 }, { "epoch": 1.593103448275862, "grad_norm": 4.68930196762085, "learning_rate": 8.153993372421926e-06, "loss": 0.7898, "step": 5775 }, { "epoch": 1.5933793103448277, "grad_norm": 3.6926560401916504, "learning_rate": 8.153285424714242e-06, "loss": 0.7913, "step": 5776 }, { "epoch": 1.593655172413793, "grad_norm": 3.804421901702881, "learning_rate": 8.15257737202872e-06, "loss": 0.7808, "step": 5777 }, { "epoch": 1.5939310344827586, "grad_norm": 3.6219894886016846, "learning_rate": 8.151869214388934e-06, "loss": 0.7976, "step": 5778 }, { "epoch": 1.5942068965517242, "grad_norm": 3.739591360092163, "learning_rate": 8.151160951818458e-06, "loss": 0.7278, "step": 5779 }, { "epoch": 1.5944827586206896, "grad_norm": 4.040210247039795, "learning_rate": 8.15045258434087e-06, "loss": 0.879, "step": 5780 }, { "epoch": 1.5947586206896551, "grad_norm": 4.2176618576049805, "learning_rate": 8.149744111979758e-06, "loss": 0.8233, "step": 5781 }, { "epoch": 1.5950344827586207, "grad_norm": 4.191851615905762, "learning_rate": 8.149035534758705e-06, "loss": 0.7965, "step": 5782 }, { "epoch": 1.595310344827586, "grad_norm": 3.9646170139312744, "learning_rate": 8.148326852701298e-06, "loss": 0.8219, "step": 5783 }, { "epoch": 1.5955862068965518, "grad_norm": 4.226274490356445, "learning_rate": 8.147618065831134e-06, "loss": 0.7533, "step": 5784 }, { "epoch": 1.5958620689655172, "grad_norm": 4.226863861083984, "learning_rate": 8.146909174171808e-06, "loss": 0.6916, "step": 5785 }, { "epoch": 1.5961379310344828, "grad_norm": 4.108553886413574, "learning_rate": 8.14620017774692e-06, "loss": 0.9895, "step": 5786 }, { "epoch": 1.5964137931034483, "grad_norm": 4.301424026489258, "learning_rate": 8.145491076580075e-06, "loss": 0.8467, "step": 5787 }, { "epoch": 1.5966896551724137, "grad_norm": 4.128389835357666, "learning_rate": 8.144781870694877e-06, "loss": 0.7731, "step": 5788 }, { "epoch": 1.5969655172413795, "grad_norm": 3.603656053543091, "learning_rate": 8.14407256011494e-06, "loss": 0.7031, "step": 5789 }, { "epoch": 1.5972413793103448, "grad_norm": 4.358375549316406, "learning_rate": 8.143363144863875e-06, "loss": 0.9053, "step": 5790 }, { "epoch": 1.5975172413793104, "grad_norm": 3.838207244873047, "learning_rate": 8.142653624965304e-06, "loss": 0.8433, "step": 5791 }, { "epoch": 1.597793103448276, "grad_norm": 3.727309226989746, "learning_rate": 8.141944000442844e-06, "loss": 0.7503, "step": 5792 }, { "epoch": 1.5980689655172413, "grad_norm": 3.724647045135498, "learning_rate": 8.14123427132012e-06, "loss": 0.7994, "step": 5793 }, { "epoch": 1.5983448275862069, "grad_norm": 3.8313443660736084, "learning_rate": 8.14052443762076e-06, "loss": 0.672, "step": 5794 }, { "epoch": 1.5986206896551725, "grad_norm": 3.916922092437744, "learning_rate": 8.139814499368399e-06, "loss": 0.8707, "step": 5795 }, { "epoch": 1.5988965517241378, "grad_norm": 4.0880818367004395, "learning_rate": 8.139104456586667e-06, "loss": 0.8594, "step": 5796 }, { "epoch": 1.5991724137931036, "grad_norm": 4.0939812660217285, "learning_rate": 8.138394309299205e-06, "loss": 0.7118, "step": 5797 }, { "epoch": 1.599448275862069, "grad_norm": 4.187185287475586, "learning_rate": 8.137684057529656e-06, "loss": 0.7486, "step": 5798 }, { "epoch": 1.5997241379310345, "grad_norm": 4.193525791168213, "learning_rate": 8.136973701301662e-06, "loss": 0.8931, "step": 5799 }, { "epoch": 1.6, "grad_norm": 4.055092811584473, "learning_rate": 8.136263240638875e-06, "loss": 0.8334, "step": 5800 }, { "epoch": 1.6002758620689654, "grad_norm": 4.034084796905518, "learning_rate": 8.135552675564946e-06, "loss": 0.7941, "step": 5801 }, { "epoch": 1.600551724137931, "grad_norm": 4.067031383514404, "learning_rate": 8.13484200610353e-06, "loss": 0.8871, "step": 5802 }, { "epoch": 1.6008275862068966, "grad_norm": 4.066622257232666, "learning_rate": 8.13413123227829e-06, "loss": 0.8094, "step": 5803 }, { "epoch": 1.601103448275862, "grad_norm": 4.319196701049805, "learning_rate": 8.133420354112885e-06, "loss": 0.9498, "step": 5804 }, { "epoch": 1.6013793103448277, "grad_norm": 4.316298484802246, "learning_rate": 8.13270937163098e-06, "loss": 0.8031, "step": 5805 }, { "epoch": 1.601655172413793, "grad_norm": 4.029331207275391, "learning_rate": 8.131998284856249e-06, "loss": 0.8595, "step": 5806 }, { "epoch": 1.6019310344827586, "grad_norm": 4.112841606140137, "learning_rate": 8.131287093812364e-06, "loss": 0.9655, "step": 5807 }, { "epoch": 1.6022068965517242, "grad_norm": 4.0681328773498535, "learning_rate": 8.130575798522999e-06, "loss": 0.787, "step": 5808 }, { "epoch": 1.6024827586206896, "grad_norm": 3.770829677581787, "learning_rate": 8.129864399011837e-06, "loss": 0.8061, "step": 5809 }, { "epoch": 1.6027586206896551, "grad_norm": 3.9572479724884033, "learning_rate": 8.129152895302562e-06, "loss": 0.81, "step": 5810 }, { "epoch": 1.6030344827586207, "grad_norm": 4.356111526489258, "learning_rate": 8.128441287418859e-06, "loss": 0.7541, "step": 5811 }, { "epoch": 1.603310344827586, "grad_norm": 4.09697961807251, "learning_rate": 8.12772957538442e-06, "loss": 0.7956, "step": 5812 }, { "epoch": 1.6035862068965518, "grad_norm": 3.849726676940918, "learning_rate": 8.127017759222937e-06, "loss": 0.7557, "step": 5813 }, { "epoch": 1.6038620689655172, "grad_norm": 3.7385175228118896, "learning_rate": 8.126305838958108e-06, "loss": 0.9257, "step": 5814 }, { "epoch": 1.6041379310344828, "grad_norm": 3.574589967727661, "learning_rate": 8.125593814613637e-06, "loss": 0.7189, "step": 5815 }, { "epoch": 1.6044137931034483, "grad_norm": 3.8975672721862793, "learning_rate": 8.124881686213226e-06, "loss": 0.8416, "step": 5816 }, { "epoch": 1.6046896551724137, "grad_norm": 3.707829236984253, "learning_rate": 8.12416945378058e-06, "loss": 0.7021, "step": 5817 }, { "epoch": 1.6049655172413793, "grad_norm": 3.6336586475372314, "learning_rate": 8.123457117339417e-06, "loss": 0.729, "step": 5818 }, { "epoch": 1.6052413793103448, "grad_norm": 4.004858016967773, "learning_rate": 8.122744676913447e-06, "loss": 0.698, "step": 5819 }, { "epoch": 1.6055172413793104, "grad_norm": 4.067915916442871, "learning_rate": 8.122032132526389e-06, "loss": 0.7849, "step": 5820 }, { "epoch": 1.605793103448276, "grad_norm": 4.181692123413086, "learning_rate": 8.121319484201966e-06, "loss": 0.9225, "step": 5821 }, { "epoch": 1.6060689655172413, "grad_norm": 4.376669406890869, "learning_rate": 8.120606731963902e-06, "loss": 0.9661, "step": 5822 }, { "epoch": 1.606344827586207, "grad_norm": 3.939893960952759, "learning_rate": 8.119893875835927e-06, "loss": 0.8255, "step": 5823 }, { "epoch": 1.6066206896551725, "grad_norm": 4.062033653259277, "learning_rate": 8.119180915841772e-06, "loss": 0.7129, "step": 5824 }, { "epoch": 1.6068965517241378, "grad_norm": 4.279183387756348, "learning_rate": 8.118467852005171e-06, "loss": 0.9, "step": 5825 }, { "epoch": 1.6071724137931036, "grad_norm": 3.968843936920166, "learning_rate": 8.117754684349866e-06, "loss": 0.7883, "step": 5826 }, { "epoch": 1.607448275862069, "grad_norm": 4.097337245941162, "learning_rate": 8.117041412899598e-06, "loss": 0.9396, "step": 5827 }, { "epoch": 1.6077241379310345, "grad_norm": 3.65810489654541, "learning_rate": 8.116328037678115e-06, "loss": 0.6747, "step": 5828 }, { "epoch": 1.608, "grad_norm": 3.769225835800171, "learning_rate": 8.11561455870916e-06, "loss": 0.8374, "step": 5829 }, { "epoch": 1.6082758620689654, "grad_norm": 4.042375087738037, "learning_rate": 8.114900976016493e-06, "loss": 0.8966, "step": 5830 }, { "epoch": 1.608551724137931, "grad_norm": 3.6075150966644287, "learning_rate": 8.114187289623867e-06, "loss": 0.727, "step": 5831 }, { "epoch": 1.6088275862068966, "grad_norm": 3.523705005645752, "learning_rate": 8.113473499555044e-06, "loss": 0.7645, "step": 5832 }, { "epoch": 1.609103448275862, "grad_norm": 4.146546363830566, "learning_rate": 8.112759605833784e-06, "loss": 0.8081, "step": 5833 }, { "epoch": 1.6093793103448277, "grad_norm": 3.4857165813446045, "learning_rate": 8.112045608483856e-06, "loss": 0.7885, "step": 5834 }, { "epoch": 1.609655172413793, "grad_norm": 4.122952461242676, "learning_rate": 8.11133150752903e-06, "loss": 0.8521, "step": 5835 }, { "epoch": 1.6099310344827586, "grad_norm": 3.9658563137054443, "learning_rate": 8.110617302993075e-06, "loss": 0.8727, "step": 5836 }, { "epoch": 1.6102068965517242, "grad_norm": 4.035981178283691, "learning_rate": 8.109902994899776e-06, "loss": 0.7473, "step": 5837 }, { "epoch": 1.6104827586206896, "grad_norm": 4.353499412536621, "learning_rate": 8.109188583272909e-06, "loss": 0.9579, "step": 5838 }, { "epoch": 1.6107586206896551, "grad_norm": 3.677076816558838, "learning_rate": 8.108474068136256e-06, "loss": 0.7615, "step": 5839 }, { "epoch": 1.6110344827586207, "grad_norm": 3.920076847076416, "learning_rate": 8.107759449513609e-06, "loss": 0.895, "step": 5840 }, { "epoch": 1.611310344827586, "grad_norm": 3.668203115463257, "learning_rate": 8.107044727428755e-06, "loss": 0.8112, "step": 5841 }, { "epoch": 1.6115862068965519, "grad_norm": 3.678715705871582, "learning_rate": 8.106329901905488e-06, "loss": 0.7655, "step": 5842 }, { "epoch": 1.6118620689655172, "grad_norm": 3.7497735023498535, "learning_rate": 8.105614972967611e-06, "loss": 0.7105, "step": 5843 }, { "epoch": 1.6121379310344828, "grad_norm": 3.794459819793701, "learning_rate": 8.104899940638917e-06, "loss": 0.8113, "step": 5844 }, { "epoch": 1.6124137931034483, "grad_norm": 3.830256462097168, "learning_rate": 8.104184804943217e-06, "loss": 0.7676, "step": 5845 }, { "epoch": 1.6126896551724137, "grad_norm": 3.985624313354492, "learning_rate": 8.103469565904316e-06, "loss": 0.9507, "step": 5846 }, { "epoch": 1.6129655172413793, "grad_norm": 3.611306667327881, "learning_rate": 8.102754223546027e-06, "loss": 0.8007, "step": 5847 }, { "epoch": 1.6132413793103448, "grad_norm": 3.9101955890655518, "learning_rate": 8.102038777892165e-06, "loss": 0.9204, "step": 5848 }, { "epoch": 1.6135172413793102, "grad_norm": 3.9225456714630127, "learning_rate": 8.101323228966547e-06, "loss": 0.8475, "step": 5849 }, { "epoch": 1.613793103448276, "grad_norm": 4.312676906585693, "learning_rate": 8.100607576792993e-06, "loss": 0.7296, "step": 5850 }, { "epoch": 1.6140689655172413, "grad_norm": 3.743039846420288, "learning_rate": 8.099891821395334e-06, "loss": 0.7507, "step": 5851 }, { "epoch": 1.614344827586207, "grad_norm": 4.007782936096191, "learning_rate": 8.099175962797393e-06, "loss": 0.7939, "step": 5852 }, { "epoch": 1.6146206896551725, "grad_norm": 3.5677266120910645, "learning_rate": 8.098460001023003e-06, "loss": 0.6618, "step": 5853 }, { "epoch": 1.6148965517241378, "grad_norm": 4.243386268615723, "learning_rate": 8.097743936096002e-06, "loss": 1.0, "step": 5854 }, { "epoch": 1.6151724137931036, "grad_norm": 3.7429616451263428, "learning_rate": 8.097027768040228e-06, "loss": 0.7722, "step": 5855 }, { "epoch": 1.615448275862069, "grad_norm": 3.7832751274108887, "learning_rate": 8.096311496879523e-06, "loss": 0.8634, "step": 5856 }, { "epoch": 1.6157241379310345, "grad_norm": 3.472242593765259, "learning_rate": 8.09559512263773e-06, "loss": 0.7603, "step": 5857 }, { "epoch": 1.616, "grad_norm": 4.319008827209473, "learning_rate": 8.094878645338703e-06, "loss": 0.8243, "step": 5858 }, { "epoch": 1.6162758620689655, "grad_norm": 3.680588960647583, "learning_rate": 8.094162065006293e-06, "loss": 0.8366, "step": 5859 }, { "epoch": 1.616551724137931, "grad_norm": 4.592522621154785, "learning_rate": 8.093445381664355e-06, "loss": 0.9273, "step": 5860 }, { "epoch": 1.6168275862068966, "grad_norm": 4.065406799316406, "learning_rate": 8.092728595336749e-06, "loss": 0.7905, "step": 5861 }, { "epoch": 1.617103448275862, "grad_norm": 4.062967777252197, "learning_rate": 8.092011706047339e-06, "loss": 0.8509, "step": 5862 }, { "epoch": 1.6173793103448277, "grad_norm": 3.4767444133758545, "learning_rate": 8.091294713819988e-06, "loss": 0.7211, "step": 5863 }, { "epoch": 1.617655172413793, "grad_norm": 3.952300786972046, "learning_rate": 8.09057761867857e-06, "loss": 0.8285, "step": 5864 }, { "epoch": 1.6179310344827587, "grad_norm": 3.548905849456787, "learning_rate": 8.089860420646957e-06, "loss": 0.7535, "step": 5865 }, { "epoch": 1.6182068965517242, "grad_norm": 5.12382173538208, "learning_rate": 8.089143119749025e-06, "loss": 0.8656, "step": 5866 }, { "epoch": 1.6184827586206896, "grad_norm": 3.3876397609710693, "learning_rate": 8.088425716008653e-06, "loss": 0.8044, "step": 5867 }, { "epoch": 1.6187586206896551, "grad_norm": 3.685258626937866, "learning_rate": 8.087708209449727e-06, "loss": 0.7081, "step": 5868 }, { "epoch": 1.6190344827586207, "grad_norm": 4.185078144073486, "learning_rate": 8.086990600096131e-06, "loss": 0.9039, "step": 5869 }, { "epoch": 1.619310344827586, "grad_norm": 4.333472728729248, "learning_rate": 8.08627288797176e-06, "loss": 0.8308, "step": 5870 }, { "epoch": 1.6195862068965519, "grad_norm": 3.8600873947143555, "learning_rate": 8.085555073100501e-06, "loss": 0.9482, "step": 5871 }, { "epoch": 1.6198620689655172, "grad_norm": 4.3959503173828125, "learning_rate": 8.084837155506257e-06, "loss": 0.9048, "step": 5872 }, { "epoch": 1.6201379310344828, "grad_norm": 4.4280171394348145, "learning_rate": 8.084119135212926e-06, "loss": 0.7473, "step": 5873 }, { "epoch": 1.6204137931034484, "grad_norm": 3.9434003829956055, "learning_rate": 8.083401012244414e-06, "loss": 0.7822, "step": 5874 }, { "epoch": 1.6206896551724137, "grad_norm": 3.9254469871520996, "learning_rate": 8.082682786624626e-06, "loss": 0.7358, "step": 5875 }, { "epoch": 1.6209655172413793, "grad_norm": 4.285278797149658, "learning_rate": 8.081964458377475e-06, "loss": 0.9289, "step": 5876 }, { "epoch": 1.6212413793103448, "grad_norm": 4.107844829559326, "learning_rate": 8.081246027526872e-06, "loss": 0.7982, "step": 5877 }, { "epoch": 1.6215172413793102, "grad_norm": 3.514314651489258, "learning_rate": 8.08052749409674e-06, "loss": 0.6804, "step": 5878 }, { "epoch": 1.621793103448276, "grad_norm": 4.293619155883789, "learning_rate": 8.079808858110993e-06, "loss": 0.7933, "step": 5879 }, { "epoch": 1.6220689655172413, "grad_norm": 3.785681962966919, "learning_rate": 8.079090119593562e-06, "loss": 0.8328, "step": 5880 }, { "epoch": 1.622344827586207, "grad_norm": 3.674544334411621, "learning_rate": 8.078371278568372e-06, "loss": 0.7633, "step": 5881 }, { "epoch": 1.6226206896551725, "grad_norm": 3.9979991912841797, "learning_rate": 8.077652335059355e-06, "loss": 0.8702, "step": 5882 }, { "epoch": 1.6228965517241378, "grad_norm": 3.9954800605773926, "learning_rate": 8.076933289090445e-06, "loss": 0.7323, "step": 5883 }, { "epoch": 1.6231724137931034, "grad_norm": 4.081053733825684, "learning_rate": 8.076214140685582e-06, "loss": 0.9396, "step": 5884 }, { "epoch": 1.623448275862069, "grad_norm": 3.9659738540649414, "learning_rate": 8.075494889868704e-06, "loss": 0.8794, "step": 5885 }, { "epoch": 1.6237241379310343, "grad_norm": 4.026506423950195, "learning_rate": 8.074775536663759e-06, "loss": 0.8087, "step": 5886 }, { "epoch": 1.624, "grad_norm": 3.958876609802246, "learning_rate": 8.074056081094696e-06, "loss": 0.9133, "step": 5887 }, { "epoch": 1.6242758620689655, "grad_norm": 3.829782485961914, "learning_rate": 8.073336523185465e-06, "loss": 0.7394, "step": 5888 }, { "epoch": 1.624551724137931, "grad_norm": 4.298987865447998, "learning_rate": 8.072616862960021e-06, "loss": 0.8665, "step": 5889 }, { "epoch": 1.6248275862068966, "grad_norm": 3.6581614017486572, "learning_rate": 8.071897100442324e-06, "loss": 0.7522, "step": 5890 }, { "epoch": 1.625103448275862, "grad_norm": 4.105040073394775, "learning_rate": 8.071177235656335e-06, "loss": 0.8122, "step": 5891 }, { "epoch": 1.6253793103448277, "grad_norm": 4.149657726287842, "learning_rate": 8.070457268626019e-06, "loss": 0.9437, "step": 5892 }, { "epoch": 1.625655172413793, "grad_norm": 4.273311614990234, "learning_rate": 8.069737199375347e-06, "loss": 0.9343, "step": 5893 }, { "epoch": 1.6259310344827587, "grad_norm": 3.8624045848846436, "learning_rate": 8.069017027928289e-06, "loss": 0.7838, "step": 5894 }, { "epoch": 1.6262068965517242, "grad_norm": 3.818859815597534, "learning_rate": 8.068296754308822e-06, "loss": 0.9057, "step": 5895 }, { "epoch": 1.6264827586206896, "grad_norm": 3.3642477989196777, "learning_rate": 8.067576378540925e-06, "loss": 0.7312, "step": 5896 }, { "epoch": 1.6267586206896552, "grad_norm": 3.5492053031921387, "learning_rate": 8.06685590064858e-06, "loss": 0.8165, "step": 5897 }, { "epoch": 1.6270344827586207, "grad_norm": 3.870769739151001, "learning_rate": 8.066135320655772e-06, "loss": 0.9112, "step": 5898 }, { "epoch": 1.627310344827586, "grad_norm": 4.054060935974121, "learning_rate": 8.065414638586491e-06, "loss": 0.9385, "step": 5899 }, { "epoch": 1.6275862068965519, "grad_norm": 3.7210490703582764, "learning_rate": 8.064693854464731e-06, "loss": 0.7617, "step": 5900 }, { "epoch": 1.6278620689655172, "grad_norm": 3.9776952266693115, "learning_rate": 8.063972968314487e-06, "loss": 0.8218, "step": 5901 }, { "epoch": 1.6281379310344828, "grad_norm": 3.995602607727051, "learning_rate": 8.063251980159756e-06, "loss": 0.8415, "step": 5902 }, { "epoch": 1.6284137931034484, "grad_norm": 4.221772193908691, "learning_rate": 8.062530890024545e-06, "loss": 0.7749, "step": 5903 }, { "epoch": 1.6286896551724137, "grad_norm": 3.864971399307251, "learning_rate": 8.061809697932856e-06, "loss": 0.8706, "step": 5904 }, { "epoch": 1.6289655172413793, "grad_norm": 4.7748823165893555, "learning_rate": 8.061088403908703e-06, "loss": 0.7887, "step": 5905 }, { "epoch": 1.6292413793103449, "grad_norm": 4.121566295623779, "learning_rate": 8.060367007976096e-06, "loss": 0.8179, "step": 5906 }, { "epoch": 1.6295172413793102, "grad_norm": 4.2198309898376465, "learning_rate": 8.059645510159053e-06, "loss": 0.8212, "step": 5907 }, { "epoch": 1.629793103448276, "grad_norm": 3.9294309616088867, "learning_rate": 8.058923910481591e-06, "loss": 0.6926, "step": 5908 }, { "epoch": 1.6300689655172413, "grad_norm": 4.0593743324279785, "learning_rate": 8.058202208967738e-06, "loss": 0.8821, "step": 5909 }, { "epoch": 1.630344827586207, "grad_norm": 4.175160884857178, "learning_rate": 8.057480405641516e-06, "loss": 0.9546, "step": 5910 }, { "epoch": 1.6306206896551725, "grad_norm": 4.330799102783203, "learning_rate": 8.056758500526958e-06, "loss": 0.8098, "step": 5911 }, { "epoch": 1.6308965517241378, "grad_norm": 4.272992134094238, "learning_rate": 8.056036493648097e-06, "loss": 0.856, "step": 5912 }, { "epoch": 1.6311724137931034, "grad_norm": 3.86541748046875, "learning_rate": 8.055314385028967e-06, "loss": 0.9044, "step": 5913 }, { "epoch": 1.631448275862069, "grad_norm": 3.7132842540740967, "learning_rate": 8.054592174693609e-06, "loss": 0.7112, "step": 5914 }, { "epoch": 1.6317241379310343, "grad_norm": 4.332250118255615, "learning_rate": 8.05386986266607e-06, "loss": 0.9419, "step": 5915 }, { "epoch": 1.6320000000000001, "grad_norm": 3.8752119541168213, "learning_rate": 8.053147448970396e-06, "loss": 0.6602, "step": 5916 }, { "epoch": 1.6322758620689655, "grad_norm": 4.210239410400391, "learning_rate": 8.052424933630635e-06, "loss": 0.9521, "step": 5917 }, { "epoch": 1.632551724137931, "grad_norm": 3.9785444736480713, "learning_rate": 8.051702316670838e-06, "loss": 0.9235, "step": 5918 }, { "epoch": 1.6328275862068966, "grad_norm": 4.625126838684082, "learning_rate": 8.050979598115067e-06, "loss": 0.9618, "step": 5919 }, { "epoch": 1.633103448275862, "grad_norm": 3.974838972091675, "learning_rate": 8.050256777987381e-06, "loss": 0.8077, "step": 5920 }, { "epoch": 1.6333793103448278, "grad_norm": 3.8034749031066895, "learning_rate": 8.049533856311844e-06, "loss": 0.8149, "step": 5921 }, { "epoch": 1.633655172413793, "grad_norm": 4.179654121398926, "learning_rate": 8.048810833112526e-06, "loss": 0.8458, "step": 5922 }, { "epoch": 1.6339310344827587, "grad_norm": 3.9313127994537354, "learning_rate": 8.04808770841349e-06, "loss": 0.8159, "step": 5923 }, { "epoch": 1.6342068965517242, "grad_norm": 3.8833553791046143, "learning_rate": 8.047364482238818e-06, "loss": 0.7631, "step": 5924 }, { "epoch": 1.6344827586206896, "grad_norm": 4.165414810180664, "learning_rate": 8.04664115461258e-06, "loss": 0.8181, "step": 5925 }, { "epoch": 1.6347586206896552, "grad_norm": 3.5228271484375, "learning_rate": 8.045917725558865e-06, "loss": 0.7181, "step": 5926 }, { "epoch": 1.6350344827586207, "grad_norm": 3.9478015899658203, "learning_rate": 8.04519419510175e-06, "loss": 0.7964, "step": 5927 }, { "epoch": 1.635310344827586, "grad_norm": 4.021729946136475, "learning_rate": 8.044470563265324e-06, "loss": 0.8609, "step": 5928 }, { "epoch": 1.6355862068965519, "grad_norm": 4.302889823913574, "learning_rate": 8.043746830073682e-06, "loss": 0.899, "step": 5929 }, { "epoch": 1.6358620689655172, "grad_norm": 3.5714111328125, "learning_rate": 8.043022995550914e-06, "loss": 0.8653, "step": 5930 }, { "epoch": 1.6361379310344828, "grad_norm": 3.7361788749694824, "learning_rate": 8.042299059721118e-06, "loss": 0.8526, "step": 5931 }, { "epoch": 1.6364137931034484, "grad_norm": 4.113379001617432, "learning_rate": 8.041575022608396e-06, "loss": 0.7322, "step": 5932 }, { "epoch": 1.6366896551724137, "grad_norm": 4.203580856323242, "learning_rate": 8.040850884236853e-06, "loss": 0.8939, "step": 5933 }, { "epoch": 1.6369655172413793, "grad_norm": 4.176551342010498, "learning_rate": 8.040126644630597e-06, "loss": 0.9738, "step": 5934 }, { "epoch": 1.6372413793103449, "grad_norm": 3.391188144683838, "learning_rate": 8.039402303813735e-06, "loss": 0.8592, "step": 5935 }, { "epoch": 1.6375172413793102, "grad_norm": 3.768993616104126, "learning_rate": 8.038677861810386e-06, "loss": 0.7463, "step": 5936 }, { "epoch": 1.637793103448276, "grad_norm": 4.003043174743652, "learning_rate": 8.037953318644665e-06, "loss": 0.8813, "step": 5937 }, { "epoch": 1.6380689655172413, "grad_norm": 3.9670941829681396, "learning_rate": 8.037228674340695e-06, "loss": 0.9776, "step": 5938 }, { "epoch": 1.638344827586207, "grad_norm": 4.427738666534424, "learning_rate": 8.036503928922598e-06, "loss": 0.7808, "step": 5939 }, { "epoch": 1.6386206896551725, "grad_norm": 4.651776313781738, "learning_rate": 8.035779082414508e-06, "loss": 0.827, "step": 5940 }, { "epoch": 1.6388965517241378, "grad_norm": 4.210686206817627, "learning_rate": 8.035054134840548e-06, "loss": 0.8558, "step": 5941 }, { "epoch": 1.6391724137931034, "grad_norm": 3.8504116535186768, "learning_rate": 8.034329086224859e-06, "loss": 0.8357, "step": 5942 }, { "epoch": 1.639448275862069, "grad_norm": 4.043741226196289, "learning_rate": 8.033603936591576e-06, "loss": 0.8417, "step": 5943 }, { "epoch": 1.6397241379310343, "grad_norm": 3.9528512954711914, "learning_rate": 8.03287868596484e-06, "loss": 0.8895, "step": 5944 }, { "epoch": 1.6400000000000001, "grad_norm": 4.085151672363281, "learning_rate": 8.0321533343688e-06, "loss": 0.7118, "step": 5945 }, { "epoch": 1.6402758620689655, "grad_norm": 3.8703839778900146, "learning_rate": 8.031427881827597e-06, "loss": 0.8221, "step": 5946 }, { "epoch": 1.640551724137931, "grad_norm": 4.02365779876709, "learning_rate": 8.030702328365388e-06, "loss": 0.9228, "step": 5947 }, { "epoch": 1.6408275862068966, "grad_norm": 4.067877292633057, "learning_rate": 8.029976674006327e-06, "loss": 0.8902, "step": 5948 }, { "epoch": 1.641103448275862, "grad_norm": 3.9625039100646973, "learning_rate": 8.029250918774572e-06, "loss": 0.9263, "step": 5949 }, { "epoch": 1.6413793103448275, "grad_norm": 3.5414280891418457, "learning_rate": 8.028525062694282e-06, "loss": 0.6654, "step": 5950 }, { "epoch": 1.641655172413793, "grad_norm": 4.417990207672119, "learning_rate": 8.027799105789625e-06, "loss": 0.705, "step": 5951 }, { "epoch": 1.6419310344827585, "grad_norm": 3.850071668624878, "learning_rate": 8.027073048084765e-06, "loss": 0.9348, "step": 5952 }, { "epoch": 1.6422068965517242, "grad_norm": 4.3195414543151855, "learning_rate": 8.02634688960388e-06, "loss": 0.9726, "step": 5953 }, { "epoch": 1.6424827586206896, "grad_norm": 3.7039568424224854, "learning_rate": 8.025620630371141e-06, "loss": 0.7986, "step": 5954 }, { "epoch": 1.6427586206896552, "grad_norm": 3.9509568214416504, "learning_rate": 8.024894270410727e-06, "loss": 0.9689, "step": 5955 }, { "epoch": 1.6430344827586207, "grad_norm": 3.767953872680664, "learning_rate": 8.02416780974682e-06, "loss": 0.7617, "step": 5956 }, { "epoch": 1.643310344827586, "grad_norm": 3.9983861446380615, "learning_rate": 8.023441248403603e-06, "loss": 0.8004, "step": 5957 }, { "epoch": 1.6435862068965519, "grad_norm": 3.994919538497925, "learning_rate": 8.022714586405267e-06, "loss": 0.8334, "step": 5958 }, { "epoch": 1.6438620689655172, "grad_norm": 3.7090370655059814, "learning_rate": 8.021987823776002e-06, "loss": 0.736, "step": 5959 }, { "epoch": 1.6441379310344828, "grad_norm": 3.933326244354248, "learning_rate": 8.021260960540005e-06, "loss": 0.8517, "step": 5960 }, { "epoch": 1.6444137931034484, "grad_norm": 4.128543376922607, "learning_rate": 8.020533996721473e-06, "loss": 0.9699, "step": 5961 }, { "epoch": 1.6446896551724137, "grad_norm": 4.293280124664307, "learning_rate": 8.019806932344607e-06, "loss": 0.8324, "step": 5962 }, { "epoch": 1.6449655172413793, "grad_norm": 3.640435218811035, "learning_rate": 8.019079767433614e-06, "loss": 0.7146, "step": 5963 }, { "epoch": 1.6452413793103449, "grad_norm": 3.8159306049346924, "learning_rate": 8.0183525020127e-06, "loss": 0.8373, "step": 5964 }, { "epoch": 1.6455172413793102, "grad_norm": 3.764512777328491, "learning_rate": 8.017625136106081e-06, "loss": 0.8268, "step": 5965 }, { "epoch": 1.645793103448276, "grad_norm": 3.8213768005371094, "learning_rate": 8.016897669737967e-06, "loss": 0.7872, "step": 5966 }, { "epoch": 1.6460689655172414, "grad_norm": 4.423281669616699, "learning_rate": 8.016170102932581e-06, "loss": 0.933, "step": 5967 }, { "epoch": 1.646344827586207, "grad_norm": 4.198904514312744, "learning_rate": 8.015442435714142e-06, "loss": 0.832, "step": 5968 }, { "epoch": 1.6466206896551725, "grad_norm": 3.796830177307129, "learning_rate": 8.014714668106876e-06, "loss": 0.8283, "step": 5969 }, { "epoch": 1.6468965517241378, "grad_norm": 4.0332112312316895, "learning_rate": 8.013986800135011e-06, "loss": 0.8302, "step": 5970 }, { "epoch": 1.6471724137931034, "grad_norm": 3.4565396308898926, "learning_rate": 8.013258831822782e-06, "loss": 0.7138, "step": 5971 }, { "epoch": 1.647448275862069, "grad_norm": 3.464956283569336, "learning_rate": 8.012530763194418e-06, "loss": 0.6693, "step": 5972 }, { "epoch": 1.6477241379310343, "grad_norm": 3.9597668647766113, "learning_rate": 8.011802594274163e-06, "loss": 0.8319, "step": 5973 }, { "epoch": 1.6480000000000001, "grad_norm": 3.662728786468506, "learning_rate": 8.011074325086257e-06, "loss": 0.8042, "step": 5974 }, { "epoch": 1.6482758620689655, "grad_norm": 3.621633768081665, "learning_rate": 8.010345955654945e-06, "loss": 0.7749, "step": 5975 }, { "epoch": 1.648551724137931, "grad_norm": 3.8943254947662354, "learning_rate": 8.009617486004476e-06, "loss": 0.7625, "step": 5976 }, { "epoch": 1.6488275862068966, "grad_norm": 3.928285598754883, "learning_rate": 8.008888916159102e-06, "loss": 0.9603, "step": 5977 }, { "epoch": 1.649103448275862, "grad_norm": 4.057824611663818, "learning_rate": 8.008160246143078e-06, "loss": 0.8086, "step": 5978 }, { "epoch": 1.6493793103448275, "grad_norm": 3.8435299396514893, "learning_rate": 8.007431475980663e-06, "loss": 0.7276, "step": 5979 }, { "epoch": 1.6496551724137931, "grad_norm": 3.9854347705841064, "learning_rate": 8.006702605696118e-06, "loss": 0.8057, "step": 5980 }, { "epoch": 1.6499310344827585, "grad_norm": 4.313213348388672, "learning_rate": 8.005973635313708e-06, "loss": 0.8236, "step": 5981 }, { "epoch": 1.6502068965517243, "grad_norm": 3.526573419570923, "learning_rate": 8.005244564857705e-06, "loss": 0.772, "step": 5982 }, { "epoch": 1.6504827586206896, "grad_norm": 4.171314239501953, "learning_rate": 8.004515394352377e-06, "loss": 0.8801, "step": 5983 }, { "epoch": 1.6507586206896552, "grad_norm": 4.249690532684326, "learning_rate": 8.003786123822e-06, "loss": 0.9518, "step": 5984 }, { "epoch": 1.6510344827586207, "grad_norm": 4.101199150085449, "learning_rate": 8.003056753290851e-06, "loss": 0.8153, "step": 5985 }, { "epoch": 1.651310344827586, "grad_norm": 3.966266632080078, "learning_rate": 8.002327282783218e-06, "loss": 0.8698, "step": 5986 }, { "epoch": 1.651586206896552, "grad_norm": 3.991783857345581, "learning_rate": 8.001597712323378e-06, "loss": 0.8161, "step": 5987 }, { "epoch": 1.6518620689655172, "grad_norm": 3.6102123260498047, "learning_rate": 8.000868041935626e-06, "loss": 0.8098, "step": 5988 }, { "epoch": 1.6521379310344828, "grad_norm": 4.365334510803223, "learning_rate": 8.000138271644251e-06, "loss": 0.9192, "step": 5989 }, { "epoch": 1.6524137931034484, "grad_norm": 3.7811646461486816, "learning_rate": 7.999408401473551e-06, "loss": 0.7475, "step": 5990 }, { "epoch": 1.6526896551724137, "grad_norm": 3.659959316253662, "learning_rate": 7.998678431447819e-06, "loss": 0.8176, "step": 5991 }, { "epoch": 1.6529655172413793, "grad_norm": 3.851341485977173, "learning_rate": 7.997948361591362e-06, "loss": 0.7561, "step": 5992 }, { "epoch": 1.6532413793103449, "grad_norm": 3.7759528160095215, "learning_rate": 7.997218191928482e-06, "loss": 0.8196, "step": 5993 }, { "epoch": 1.6535172413793102, "grad_norm": 4.299834728240967, "learning_rate": 7.996487922483491e-06, "loss": 1.0252, "step": 5994 }, { "epoch": 1.653793103448276, "grad_norm": 4.272045612335205, "learning_rate": 7.995757553280696e-06, "loss": 0.9055, "step": 5995 }, { "epoch": 1.6540689655172414, "grad_norm": 3.8312718868255615, "learning_rate": 7.995027084344418e-06, "loss": 0.7415, "step": 5996 }, { "epoch": 1.654344827586207, "grad_norm": 3.8539865016937256, "learning_rate": 7.994296515698969e-06, "loss": 0.8397, "step": 5997 }, { "epoch": 1.6546206896551725, "grad_norm": 3.987495183944702, "learning_rate": 7.993565847368676e-06, "loss": 0.765, "step": 5998 }, { "epoch": 1.6548965517241379, "grad_norm": 3.8456480503082275, "learning_rate": 7.992835079377861e-06, "loss": 0.7331, "step": 5999 }, { "epoch": 1.6551724137931034, "grad_norm": 3.9319725036621094, "learning_rate": 7.992104211750856e-06, "loss": 0.7659, "step": 6000 }, { "epoch": 1.6551724137931034, "eval_loss": 1.2673325538635254, "eval_runtime": 13.7883, "eval_samples_per_second": 29.01, "eval_steps_per_second": 3.626, "step": 6000 }, { "epoch": 1.655448275862069, "grad_norm": 3.9430177211761475, "learning_rate": 7.991373244511987e-06, "loss": 0.8782, "step": 6001 }, { "epoch": 1.6557241379310343, "grad_norm": 3.869615316390991, "learning_rate": 7.990642177685592e-06, "loss": 0.8693, "step": 6002 }, { "epoch": 1.6560000000000001, "grad_norm": 3.872520923614502, "learning_rate": 7.989911011296012e-06, "loss": 0.7256, "step": 6003 }, { "epoch": 1.6562758620689655, "grad_norm": 4.462812900543213, "learning_rate": 7.989179745367586e-06, "loss": 0.6891, "step": 6004 }, { "epoch": 1.656551724137931, "grad_norm": 3.889986038208008, "learning_rate": 7.988448379924661e-06, "loss": 0.8928, "step": 6005 }, { "epoch": 1.6568275862068966, "grad_norm": 3.903555154800415, "learning_rate": 7.98771691499158e-06, "loss": 0.7244, "step": 6006 }, { "epoch": 1.657103448275862, "grad_norm": 3.494884490966797, "learning_rate": 7.986985350592699e-06, "loss": 0.7325, "step": 6007 }, { "epoch": 1.6573793103448275, "grad_norm": 4.057406425476074, "learning_rate": 7.986253686752374e-06, "loss": 0.8296, "step": 6008 }, { "epoch": 1.6576551724137931, "grad_norm": 3.6803951263427734, "learning_rate": 7.985521923494961e-06, "loss": 0.7942, "step": 6009 }, { "epoch": 1.6579310344827585, "grad_norm": 3.792607307434082, "learning_rate": 7.984790060844822e-06, "loss": 0.8428, "step": 6010 }, { "epoch": 1.6582068965517243, "grad_norm": 3.9627633094787598, "learning_rate": 7.984058098826321e-06, "loss": 0.8333, "step": 6011 }, { "epoch": 1.6584827586206896, "grad_norm": 4.205012321472168, "learning_rate": 7.983326037463828e-06, "loss": 0.8457, "step": 6012 }, { "epoch": 1.6587586206896552, "grad_norm": 3.701998710632324, "learning_rate": 7.982593876781715e-06, "loss": 0.7671, "step": 6013 }, { "epoch": 1.6590344827586208, "grad_norm": 3.9787795543670654, "learning_rate": 7.981861616804351e-06, "loss": 0.8392, "step": 6014 }, { "epoch": 1.659310344827586, "grad_norm": 3.8793838024139404, "learning_rate": 7.981129257556122e-06, "loss": 0.8514, "step": 6015 }, { "epoch": 1.6595862068965517, "grad_norm": 3.939786434173584, "learning_rate": 7.980396799061405e-06, "loss": 0.7408, "step": 6016 }, { "epoch": 1.6598620689655172, "grad_norm": 3.8831472396850586, "learning_rate": 7.979664241344585e-06, "loss": 0.8922, "step": 6017 }, { "epoch": 1.6601379310344826, "grad_norm": 3.6049981117248535, "learning_rate": 7.97893158443005e-06, "loss": 0.6621, "step": 6018 }, { "epoch": 1.6604137931034484, "grad_norm": 3.614551067352295, "learning_rate": 7.978198828342193e-06, "loss": 0.7159, "step": 6019 }, { "epoch": 1.6606896551724137, "grad_norm": 4.06843900680542, "learning_rate": 7.977465973105408e-06, "loss": 0.8891, "step": 6020 }, { "epoch": 1.6609655172413793, "grad_norm": 4.251852035522461, "learning_rate": 7.97673301874409e-06, "loss": 0.9041, "step": 6021 }, { "epoch": 1.6612413793103449, "grad_norm": 3.8645787239074707, "learning_rate": 7.975999965282643e-06, "loss": 0.7872, "step": 6022 }, { "epoch": 1.6615172413793102, "grad_norm": 4.408511638641357, "learning_rate": 7.975266812745472e-06, "loss": 0.9116, "step": 6023 }, { "epoch": 1.661793103448276, "grad_norm": 3.3304483890533447, "learning_rate": 7.974533561156983e-06, "loss": 0.7574, "step": 6024 }, { "epoch": 1.6620689655172414, "grad_norm": 4.395424842834473, "learning_rate": 7.973800210541589e-06, "loss": 0.7839, "step": 6025 }, { "epoch": 1.662344827586207, "grad_norm": 4.107906818389893, "learning_rate": 7.973066760923702e-06, "loss": 0.819, "step": 6026 }, { "epoch": 1.6626206896551725, "grad_norm": 3.8928725719451904, "learning_rate": 7.972333212327743e-06, "loss": 0.8983, "step": 6027 }, { "epoch": 1.6628965517241379, "grad_norm": 3.9821503162384033, "learning_rate": 7.971599564778129e-06, "loss": 0.7643, "step": 6028 }, { "epoch": 1.6631724137931034, "grad_norm": 4.614604473114014, "learning_rate": 7.970865818299287e-06, "loss": 0.8411, "step": 6029 }, { "epoch": 1.663448275862069, "grad_norm": 4.406060695648193, "learning_rate": 7.970131972915643e-06, "loss": 0.9061, "step": 6030 }, { "epoch": 1.6637241379310344, "grad_norm": 3.664031982421875, "learning_rate": 7.96939802865163e-06, "loss": 0.8762, "step": 6031 }, { "epoch": 1.6640000000000001, "grad_norm": 3.886838674545288, "learning_rate": 7.96866398553168e-06, "loss": 0.7781, "step": 6032 }, { "epoch": 1.6642758620689655, "grad_norm": 4.136636734008789, "learning_rate": 7.96792984358023e-06, "loss": 0.7876, "step": 6033 }, { "epoch": 1.664551724137931, "grad_norm": 3.6436996459960938, "learning_rate": 7.967195602821726e-06, "loss": 0.7996, "step": 6034 }, { "epoch": 1.6648275862068966, "grad_norm": 4.025937080383301, "learning_rate": 7.966461263280606e-06, "loss": 0.8692, "step": 6035 }, { "epoch": 1.665103448275862, "grad_norm": 3.859097480773926, "learning_rate": 7.965726824981319e-06, "loss": 0.8527, "step": 6036 }, { "epoch": 1.6653793103448276, "grad_norm": 3.68485426902771, "learning_rate": 7.964992287948314e-06, "loss": 0.6936, "step": 6037 }, { "epoch": 1.6656551724137931, "grad_norm": 3.6632840633392334, "learning_rate": 7.96425765220605e-06, "loss": 0.7436, "step": 6038 }, { "epoch": 1.6659310344827585, "grad_norm": 4.297829627990723, "learning_rate": 7.963522917778982e-06, "loss": 0.7919, "step": 6039 }, { "epoch": 1.6662068965517243, "grad_norm": 4.093451023101807, "learning_rate": 7.962788084691567e-06, "loss": 0.8221, "step": 6040 }, { "epoch": 1.6664827586206896, "grad_norm": 3.321493625640869, "learning_rate": 7.962053152968271e-06, "loss": 0.7192, "step": 6041 }, { "epoch": 1.6667586206896552, "grad_norm": 3.967501640319824, "learning_rate": 7.961318122633563e-06, "loss": 0.8411, "step": 6042 }, { "epoch": 1.6670344827586208, "grad_norm": 3.6201388835906982, "learning_rate": 7.96058299371191e-06, "loss": 0.854, "step": 6043 }, { "epoch": 1.667310344827586, "grad_norm": 4.117356777191162, "learning_rate": 7.959847766227788e-06, "loss": 0.9063, "step": 6044 }, { "epoch": 1.6675862068965517, "grad_norm": 4.4685282707214355, "learning_rate": 7.959112440205675e-06, "loss": 0.7065, "step": 6045 }, { "epoch": 1.6678620689655173, "grad_norm": 3.2476885318756104, "learning_rate": 7.958377015670047e-06, "loss": 0.7925, "step": 6046 }, { "epoch": 1.6681379310344826, "grad_norm": 3.9503259658813477, "learning_rate": 7.95764149264539e-06, "loss": 0.7466, "step": 6047 }, { "epoch": 1.6684137931034484, "grad_norm": 4.780911445617676, "learning_rate": 7.95690587115619e-06, "loss": 0.9554, "step": 6048 }, { "epoch": 1.6686896551724137, "grad_norm": 3.94553279876709, "learning_rate": 7.956170151226938e-06, "loss": 0.7664, "step": 6049 }, { "epoch": 1.6689655172413793, "grad_norm": 3.821514368057251, "learning_rate": 7.955434332882129e-06, "loss": 0.705, "step": 6050 }, { "epoch": 1.6692413793103449, "grad_norm": 4.00225830078125, "learning_rate": 7.954698416146255e-06, "loss": 0.8256, "step": 6051 }, { "epoch": 1.6695172413793102, "grad_norm": 4.283660411834717, "learning_rate": 7.95396240104382e-06, "loss": 0.7937, "step": 6052 }, { "epoch": 1.669793103448276, "grad_norm": 3.970050096511841, "learning_rate": 7.953226287599322e-06, "loss": 0.9219, "step": 6053 }, { "epoch": 1.6700689655172414, "grad_norm": 3.7987613677978516, "learning_rate": 7.952490075837274e-06, "loss": 0.8876, "step": 6054 }, { "epoch": 1.670344827586207, "grad_norm": 3.922811269760132, "learning_rate": 7.95175376578218e-06, "loss": 0.7507, "step": 6055 }, { "epoch": 1.6706206896551725, "grad_norm": 4.079495429992676, "learning_rate": 7.951017357458555e-06, "loss": 0.8316, "step": 6056 }, { "epoch": 1.6708965517241379, "grad_norm": 4.201151371002197, "learning_rate": 7.950280850890917e-06, "loss": 0.8601, "step": 6057 }, { "epoch": 1.6711724137931034, "grad_norm": 3.345093011856079, "learning_rate": 7.949544246103782e-06, "loss": 0.6476, "step": 6058 }, { "epoch": 1.671448275862069, "grad_norm": 4.089847087860107, "learning_rate": 7.948807543121675e-06, "loss": 0.8891, "step": 6059 }, { "epoch": 1.6717241379310344, "grad_norm": 4.330294132232666, "learning_rate": 7.948070741969122e-06, "loss": 0.9178, "step": 6060 }, { "epoch": 1.6720000000000002, "grad_norm": 3.973173141479492, "learning_rate": 7.947333842670653e-06, "loss": 0.6916, "step": 6061 }, { "epoch": 1.6722758620689655, "grad_norm": 3.9539942741394043, "learning_rate": 7.946596845250799e-06, "loss": 0.7985, "step": 6062 }, { "epoch": 1.672551724137931, "grad_norm": 4.448697090148926, "learning_rate": 7.945859749734094e-06, "loss": 0.9359, "step": 6063 }, { "epoch": 1.6728275862068966, "grad_norm": 3.892214298248291, "learning_rate": 7.94512255614508e-06, "loss": 0.9255, "step": 6064 }, { "epoch": 1.673103448275862, "grad_norm": 3.5657730102539062, "learning_rate": 7.9443852645083e-06, "loss": 0.7294, "step": 6065 }, { "epoch": 1.6733793103448276, "grad_norm": 3.848795175552368, "learning_rate": 7.943647874848298e-06, "loss": 0.8066, "step": 6066 }, { "epoch": 1.6736551724137931, "grad_norm": 3.957848310470581, "learning_rate": 7.942910387189621e-06, "loss": 0.7893, "step": 6067 }, { "epoch": 1.6739310344827585, "grad_norm": 3.681293487548828, "learning_rate": 7.942172801556824e-06, "loss": 0.8165, "step": 6068 }, { "epoch": 1.6742068965517243, "grad_norm": 4.062282085418701, "learning_rate": 7.941435117974463e-06, "loss": 0.8906, "step": 6069 }, { "epoch": 1.6744827586206896, "grad_norm": 3.7179112434387207, "learning_rate": 7.940697336467094e-06, "loss": 0.9293, "step": 6070 }, { "epoch": 1.6747586206896552, "grad_norm": 3.9488272666931152, "learning_rate": 7.939959457059281e-06, "loss": 0.7977, "step": 6071 }, { "epoch": 1.6750344827586208, "grad_norm": 3.8480329513549805, "learning_rate": 7.939221479775588e-06, "loss": 0.714, "step": 6072 }, { "epoch": 1.6753103448275861, "grad_norm": 4.095948219299316, "learning_rate": 7.938483404640582e-06, "loss": 0.8159, "step": 6073 }, { "epoch": 1.6755862068965517, "grad_norm": 4.087039947509766, "learning_rate": 7.937745231678837e-06, "loss": 0.877, "step": 6074 }, { "epoch": 1.6758620689655173, "grad_norm": 3.7229223251342773, "learning_rate": 7.93700696091493e-06, "loss": 0.7942, "step": 6075 }, { "epoch": 1.6761379310344826, "grad_norm": 4.341196537017822, "learning_rate": 7.936268592373433e-06, "loss": 0.7791, "step": 6076 }, { "epoch": 1.6764137931034484, "grad_norm": 4.163792133331299, "learning_rate": 7.935530126078932e-06, "loss": 0.8418, "step": 6077 }, { "epoch": 1.6766896551724138, "grad_norm": 3.8073389530181885, "learning_rate": 7.934791562056013e-06, "loss": 0.7776, "step": 6078 }, { "epoch": 1.6769655172413793, "grad_norm": 4.162001609802246, "learning_rate": 7.93405290032926e-06, "loss": 0.8624, "step": 6079 }, { "epoch": 1.677241379310345, "grad_norm": 3.786255359649658, "learning_rate": 7.933314140923264e-06, "loss": 0.6776, "step": 6080 }, { "epoch": 1.6775172413793102, "grad_norm": 3.992746591567993, "learning_rate": 7.932575283862624e-06, "loss": 0.8362, "step": 6081 }, { "epoch": 1.6777931034482758, "grad_norm": 3.8805787563323975, "learning_rate": 7.931836329171934e-06, "loss": 0.8815, "step": 6082 }, { "epoch": 1.6780689655172414, "grad_norm": 4.408298015594482, "learning_rate": 7.931097276875797e-06, "loss": 0.8307, "step": 6083 }, { "epoch": 1.6783448275862067, "grad_norm": 4.230959892272949, "learning_rate": 7.930358126998814e-06, "loss": 0.9311, "step": 6084 }, { "epoch": 1.6786206896551725, "grad_norm": 4.4677839279174805, "learning_rate": 7.929618879565594e-06, "loss": 0.8672, "step": 6085 }, { "epoch": 1.6788965517241379, "grad_norm": 4.18815279006958, "learning_rate": 7.92887953460075e-06, "loss": 0.8268, "step": 6086 }, { "epoch": 1.6791724137931034, "grad_norm": 3.5998029708862305, "learning_rate": 7.928140092128896e-06, "loss": 0.8722, "step": 6087 }, { "epoch": 1.679448275862069, "grad_norm": 3.6419677734375, "learning_rate": 7.927400552174647e-06, "loss": 0.7349, "step": 6088 }, { "epoch": 1.6797241379310344, "grad_norm": 3.7988991737365723, "learning_rate": 7.926660914762622e-06, "loss": 0.824, "step": 6089 }, { "epoch": 1.6800000000000002, "grad_norm": 4.076469898223877, "learning_rate": 7.925921179917449e-06, "loss": 0.83, "step": 6090 }, { "epoch": 1.6802758620689655, "grad_norm": 3.9708921909332275, "learning_rate": 7.925181347663751e-06, "loss": 0.8951, "step": 6091 }, { "epoch": 1.680551724137931, "grad_norm": 4.192507743835449, "learning_rate": 7.924441418026159e-06, "loss": 0.8866, "step": 6092 }, { "epoch": 1.6808275862068967, "grad_norm": 3.6208226680755615, "learning_rate": 7.923701391029309e-06, "loss": 0.7058, "step": 6093 }, { "epoch": 1.681103448275862, "grad_norm": 3.9477779865264893, "learning_rate": 7.922961266697836e-06, "loss": 0.8069, "step": 6094 }, { "epoch": 1.6813793103448276, "grad_norm": 4.09422492980957, "learning_rate": 7.922221045056379e-06, "loss": 0.8125, "step": 6095 }, { "epoch": 1.6816551724137931, "grad_norm": 3.926370620727539, "learning_rate": 7.921480726129583e-06, "loss": 0.8423, "step": 6096 }, { "epoch": 1.6819310344827585, "grad_norm": 4.101324558258057, "learning_rate": 7.92074030994209e-06, "loss": 0.8111, "step": 6097 }, { "epoch": 1.6822068965517243, "grad_norm": 3.6860852241516113, "learning_rate": 7.919999796518556e-06, "loss": 0.7352, "step": 6098 }, { "epoch": 1.6824827586206896, "grad_norm": 3.8741211891174316, "learning_rate": 7.91925918588363e-06, "loss": 0.7615, "step": 6099 }, { "epoch": 1.6827586206896552, "grad_norm": 3.784114122390747, "learning_rate": 7.91851847806197e-06, "loss": 0.7739, "step": 6100 }, { "epoch": 1.6830344827586208, "grad_norm": 3.9786105155944824, "learning_rate": 7.917777673078232e-06, "loss": 0.7028, "step": 6101 }, { "epoch": 1.6833103448275861, "grad_norm": 4.014135360717773, "learning_rate": 7.917036770957082e-06, "loss": 0.8375, "step": 6102 }, { "epoch": 1.6835862068965517, "grad_norm": 4.109893321990967, "learning_rate": 7.916295771723184e-06, "loss": 0.8581, "step": 6103 }, { "epoch": 1.6838620689655173, "grad_norm": 4.225441932678223, "learning_rate": 7.915554675401209e-06, "loss": 0.9812, "step": 6104 }, { "epoch": 1.6841379310344826, "grad_norm": 3.684478282928467, "learning_rate": 7.914813482015826e-06, "loss": 0.8162, "step": 6105 }, { "epoch": 1.6844137931034484, "grad_norm": 3.772847890853882, "learning_rate": 7.914072191591715e-06, "loss": 0.7629, "step": 6106 }, { "epoch": 1.6846896551724138, "grad_norm": 4.1413445472717285, "learning_rate": 7.913330804153549e-06, "loss": 0.8402, "step": 6107 }, { "epoch": 1.6849655172413793, "grad_norm": 3.9021711349487305, "learning_rate": 7.912589319726016e-06, "loss": 0.698, "step": 6108 }, { "epoch": 1.685241379310345, "grad_norm": 4.131752967834473, "learning_rate": 7.911847738333798e-06, "loss": 0.9736, "step": 6109 }, { "epoch": 1.6855172413793102, "grad_norm": 3.8969826698303223, "learning_rate": 7.91110606000158e-06, "loss": 0.9502, "step": 6110 }, { "epoch": 1.6857931034482758, "grad_norm": 3.923407554626465, "learning_rate": 7.910364284754062e-06, "loss": 0.7868, "step": 6111 }, { "epoch": 1.6860689655172414, "grad_norm": 3.880812644958496, "learning_rate": 7.90962241261593e-06, "loss": 0.8219, "step": 6112 }, { "epoch": 1.6863448275862067, "grad_norm": 4.438328266143799, "learning_rate": 7.90888044361189e-06, "loss": 0.8953, "step": 6113 }, { "epoch": 1.6866206896551725, "grad_norm": 4.093905448913574, "learning_rate": 7.908138377766637e-06, "loss": 0.8487, "step": 6114 }, { "epoch": 1.6868965517241379, "grad_norm": 3.944387912750244, "learning_rate": 7.907396215104878e-06, "loss": 0.7121, "step": 6115 }, { "epoch": 1.6871724137931035, "grad_norm": 4.210451126098633, "learning_rate": 7.906653955651323e-06, "loss": 0.8268, "step": 6116 }, { "epoch": 1.687448275862069, "grad_norm": 3.888202667236328, "learning_rate": 7.90591159943068e-06, "loss": 0.7938, "step": 6117 }, { "epoch": 1.6877241379310344, "grad_norm": 4.258852958679199, "learning_rate": 7.905169146467662e-06, "loss": 0.9817, "step": 6118 }, { "epoch": 1.688, "grad_norm": 3.916748285293579, "learning_rate": 7.904426596786991e-06, "loss": 0.9373, "step": 6119 }, { "epoch": 1.6882758620689655, "grad_norm": 4.224079608917236, "learning_rate": 7.903683950413383e-06, "loss": 0.8965, "step": 6120 }, { "epoch": 1.688551724137931, "grad_norm": 3.6551668643951416, "learning_rate": 7.902941207371566e-06, "loss": 0.7939, "step": 6121 }, { "epoch": 1.6888275862068967, "grad_norm": 4.123233795166016, "learning_rate": 7.902198367686265e-06, "loss": 0.7811, "step": 6122 }, { "epoch": 1.689103448275862, "grad_norm": 3.8013713359832764, "learning_rate": 7.90145543138221e-06, "loss": 0.7967, "step": 6123 }, { "epoch": 1.6893793103448276, "grad_norm": 4.333359241485596, "learning_rate": 7.900712398484134e-06, "loss": 0.7956, "step": 6124 }, { "epoch": 1.6896551724137931, "grad_norm": 3.542379856109619, "learning_rate": 7.899969269016775e-06, "loss": 0.7729, "step": 6125 }, { "epoch": 1.6899310344827585, "grad_norm": 3.708469867706299, "learning_rate": 7.899226043004872e-06, "loss": 0.7732, "step": 6126 }, { "epoch": 1.6902068965517243, "grad_norm": 3.9145538806915283, "learning_rate": 7.898482720473172e-06, "loss": 0.7657, "step": 6127 }, { "epoch": 1.6904827586206896, "grad_norm": 4.0594587326049805, "learning_rate": 7.897739301446416e-06, "loss": 0.7011, "step": 6128 }, { "epoch": 1.6907586206896552, "grad_norm": 4.258310317993164, "learning_rate": 7.896995785949355e-06, "loss": 0.9199, "step": 6129 }, { "epoch": 1.6910344827586208, "grad_norm": 4.291155815124512, "learning_rate": 7.896252174006742e-06, "loss": 0.7764, "step": 6130 }, { "epoch": 1.6913103448275861, "grad_norm": 3.6259379386901855, "learning_rate": 7.895508465643335e-06, "loss": 0.7592, "step": 6131 }, { "epoch": 1.6915862068965517, "grad_norm": 3.758690595626831, "learning_rate": 7.894764660883892e-06, "loss": 0.7355, "step": 6132 }, { "epoch": 1.6918620689655173, "grad_norm": 3.590362310409546, "learning_rate": 7.894020759753176e-06, "loss": 0.7833, "step": 6133 }, { "epoch": 1.6921379310344826, "grad_norm": 4.242424964904785, "learning_rate": 7.89327676227595e-06, "loss": 0.7913, "step": 6134 }, { "epoch": 1.6924137931034484, "grad_norm": 3.77292799949646, "learning_rate": 7.892532668476984e-06, "loss": 0.8908, "step": 6135 }, { "epoch": 1.6926896551724138, "grad_norm": 3.7537577152252197, "learning_rate": 7.891788478381052e-06, "loss": 0.8062, "step": 6136 }, { "epoch": 1.6929655172413793, "grad_norm": 4.130527496337891, "learning_rate": 7.891044192012928e-06, "loss": 0.7992, "step": 6137 }, { "epoch": 1.693241379310345, "grad_norm": 4.10560941696167, "learning_rate": 7.89029980939739e-06, "loss": 0.8412, "step": 6138 }, { "epoch": 1.6935172413793103, "grad_norm": 4.266746520996094, "learning_rate": 7.88955533055922e-06, "loss": 0.8983, "step": 6139 }, { "epoch": 1.6937931034482758, "grad_norm": 4.061672687530518, "learning_rate": 7.888810755523205e-06, "loss": 0.8441, "step": 6140 }, { "epoch": 1.6940689655172414, "grad_norm": 3.9225540161132812, "learning_rate": 7.888066084314129e-06, "loss": 0.9465, "step": 6141 }, { "epoch": 1.6943448275862067, "grad_norm": 3.899179458618164, "learning_rate": 7.887321316956787e-06, "loss": 0.7311, "step": 6142 }, { "epoch": 1.6946206896551725, "grad_norm": 3.203784942626953, "learning_rate": 7.886576453475969e-06, "loss": 0.6832, "step": 6143 }, { "epoch": 1.694896551724138, "grad_norm": 3.9874107837677, "learning_rate": 7.885831493896478e-06, "loss": 0.7363, "step": 6144 }, { "epoch": 1.6951724137931035, "grad_norm": 4.006309986114502, "learning_rate": 7.885086438243112e-06, "loss": 0.7695, "step": 6145 }, { "epoch": 1.695448275862069, "grad_norm": 4.324071884155273, "learning_rate": 7.884341286540676e-06, "loss": 0.7613, "step": 6146 }, { "epoch": 1.6957241379310344, "grad_norm": 3.8344104290008545, "learning_rate": 7.883596038813975e-06, "loss": 0.8687, "step": 6147 }, { "epoch": 1.696, "grad_norm": 3.775869369506836, "learning_rate": 7.882850695087822e-06, "loss": 0.9251, "step": 6148 }, { "epoch": 1.6962758620689655, "grad_norm": 4.224763870239258, "learning_rate": 7.882105255387031e-06, "loss": 0.7625, "step": 6149 }, { "epoch": 1.6965517241379309, "grad_norm": 4.218647003173828, "learning_rate": 7.881359719736416e-06, "loss": 0.7801, "step": 6150 }, { "epoch": 1.6968275862068967, "grad_norm": 3.771059989929199, "learning_rate": 7.880614088160799e-06, "loss": 0.7027, "step": 6151 }, { "epoch": 1.697103448275862, "grad_norm": 4.159749507904053, "learning_rate": 7.879868360685004e-06, "loss": 0.7517, "step": 6152 }, { "epoch": 1.6973793103448276, "grad_norm": 4.10264778137207, "learning_rate": 7.879122537333857e-06, "loss": 0.8676, "step": 6153 }, { "epoch": 1.6976551724137932, "grad_norm": 4.02526330947876, "learning_rate": 7.878376618132187e-06, "loss": 0.7806, "step": 6154 }, { "epoch": 1.6979310344827585, "grad_norm": 3.813760757446289, "learning_rate": 7.877630603104824e-06, "loss": 0.7676, "step": 6155 }, { "epoch": 1.6982068965517243, "grad_norm": 3.8853399753570557, "learning_rate": 7.876884492276611e-06, "loss": 0.786, "step": 6156 }, { "epoch": 1.6984827586206896, "grad_norm": 4.338202476501465, "learning_rate": 7.87613828567238e-06, "loss": 0.8687, "step": 6157 }, { "epoch": 1.6987586206896552, "grad_norm": 3.892595052719116, "learning_rate": 7.875391983316979e-06, "loss": 0.7742, "step": 6158 }, { "epoch": 1.6990344827586208, "grad_norm": 3.6585280895233154, "learning_rate": 7.87464558523525e-06, "loss": 0.7329, "step": 6159 }, { "epoch": 1.6993103448275861, "grad_norm": 3.716312885284424, "learning_rate": 7.873899091452041e-06, "loss": 0.7461, "step": 6160 }, { "epoch": 1.6995862068965517, "grad_norm": 4.304987907409668, "learning_rate": 7.873152501992209e-06, "loss": 0.8872, "step": 6161 }, { "epoch": 1.6998620689655173, "grad_norm": 4.114180564880371, "learning_rate": 7.872405816880605e-06, "loss": 0.8485, "step": 6162 }, { "epoch": 1.7001379310344826, "grad_norm": 3.6646549701690674, "learning_rate": 7.871659036142087e-06, "loss": 0.8264, "step": 6163 }, { "epoch": 1.7004137931034484, "grad_norm": 3.6120665073394775, "learning_rate": 7.870912159801521e-06, "loss": 0.6608, "step": 6164 }, { "epoch": 1.7006896551724138, "grad_norm": 4.2018818855285645, "learning_rate": 7.870165187883765e-06, "loss": 0.7297, "step": 6165 }, { "epoch": 1.7009655172413793, "grad_norm": 3.9713151454925537, "learning_rate": 7.869418120413693e-06, "loss": 0.9404, "step": 6166 }, { "epoch": 1.701241379310345, "grad_norm": 3.7829298973083496, "learning_rate": 7.868670957416171e-06, "loss": 0.7206, "step": 6167 }, { "epoch": 1.7015172413793103, "grad_norm": 3.654722213745117, "learning_rate": 7.867923698916078e-06, "loss": 0.7736, "step": 6168 }, { "epoch": 1.7017931034482758, "grad_norm": 4.261320114135742, "learning_rate": 7.867176344938288e-06, "loss": 0.8671, "step": 6169 }, { "epoch": 1.7020689655172414, "grad_norm": 3.89906644821167, "learning_rate": 7.866428895507684e-06, "loss": 0.763, "step": 6170 }, { "epoch": 1.7023448275862068, "grad_norm": 3.925981283187866, "learning_rate": 7.865681350649146e-06, "loss": 0.8423, "step": 6171 }, { "epoch": 1.7026206896551725, "grad_norm": 4.129311561584473, "learning_rate": 7.864933710387563e-06, "loss": 0.8193, "step": 6172 }, { "epoch": 1.702896551724138, "grad_norm": 4.031579494476318, "learning_rate": 7.864185974747829e-06, "loss": 0.772, "step": 6173 }, { "epoch": 1.7031724137931035, "grad_norm": 3.8022050857543945, "learning_rate": 7.86343814375483e-06, "loss": 0.7446, "step": 6174 }, { "epoch": 1.703448275862069, "grad_norm": 4.2069220542907715, "learning_rate": 7.862690217433469e-06, "loss": 0.7635, "step": 6175 }, { "epoch": 1.7037241379310344, "grad_norm": 3.55633282661438, "learning_rate": 7.861942195808644e-06, "loss": 0.7143, "step": 6176 }, { "epoch": 1.704, "grad_norm": 4.134818077087402, "learning_rate": 7.861194078905255e-06, "loss": 0.8937, "step": 6177 }, { "epoch": 1.7042758620689655, "grad_norm": 4.190010070800781, "learning_rate": 7.860445866748208e-06, "loss": 0.9141, "step": 6178 }, { "epoch": 1.7045517241379309, "grad_norm": 3.9993202686309814, "learning_rate": 7.859697559362415e-06, "loss": 0.8952, "step": 6179 }, { "epoch": 1.7048275862068967, "grad_norm": 3.7203710079193115, "learning_rate": 7.858949156772789e-06, "loss": 0.6708, "step": 6180 }, { "epoch": 1.705103448275862, "grad_norm": 3.508077383041382, "learning_rate": 7.858200659004242e-06, "loss": 0.6802, "step": 6181 }, { "epoch": 1.7053793103448276, "grad_norm": 4.074796676635742, "learning_rate": 7.857452066081695e-06, "loss": 0.8267, "step": 6182 }, { "epoch": 1.7056551724137932, "grad_norm": 3.968419075012207, "learning_rate": 7.85670337803007e-06, "loss": 0.715, "step": 6183 }, { "epoch": 1.7059310344827585, "grad_norm": 4.104053974151611, "learning_rate": 7.855954594874288e-06, "loss": 0.874, "step": 6184 }, { "epoch": 1.706206896551724, "grad_norm": 4.125548362731934, "learning_rate": 7.855205716639285e-06, "loss": 0.8705, "step": 6185 }, { "epoch": 1.7064827586206897, "grad_norm": 3.6722207069396973, "learning_rate": 7.854456743349986e-06, "loss": 0.7852, "step": 6186 }, { "epoch": 1.7067586206896552, "grad_norm": 3.69840931892395, "learning_rate": 7.853707675031327e-06, "loss": 0.7639, "step": 6187 }, { "epoch": 1.7070344827586208, "grad_norm": 4.027606010437012, "learning_rate": 7.852958511708244e-06, "loss": 0.9066, "step": 6188 }, { "epoch": 1.7073103448275861, "grad_norm": 3.947624683380127, "learning_rate": 7.852209253405682e-06, "loss": 0.8188, "step": 6189 }, { "epoch": 1.7075862068965517, "grad_norm": 4.157985210418701, "learning_rate": 7.851459900148581e-06, "loss": 0.9143, "step": 6190 }, { "epoch": 1.7078620689655173, "grad_norm": 3.6035172939300537, "learning_rate": 7.850710451961891e-06, "loss": 0.7569, "step": 6191 }, { "epoch": 1.7081379310344826, "grad_norm": 4.177570343017578, "learning_rate": 7.849960908870562e-06, "loss": 0.9914, "step": 6192 }, { "epoch": 1.7084137931034484, "grad_norm": 4.140831470489502, "learning_rate": 7.849211270899545e-06, "loss": 0.8948, "step": 6193 }, { "epoch": 1.7086896551724138, "grad_norm": 3.8703627586364746, "learning_rate": 7.8484615380738e-06, "loss": 0.8279, "step": 6194 }, { "epoch": 1.7089655172413794, "grad_norm": 3.8362631797790527, "learning_rate": 7.847711710418284e-06, "loss": 0.8499, "step": 6195 }, { "epoch": 1.709241379310345, "grad_norm": 4.270476341247559, "learning_rate": 7.84696178795796e-06, "loss": 0.8958, "step": 6196 }, { "epoch": 1.7095172413793103, "grad_norm": 3.8807787895202637, "learning_rate": 7.846211770717797e-06, "loss": 0.7555, "step": 6197 }, { "epoch": 1.7097931034482758, "grad_norm": 3.80069899559021, "learning_rate": 7.845461658722762e-06, "loss": 0.8216, "step": 6198 }, { "epoch": 1.7100689655172414, "grad_norm": 3.9132630825042725, "learning_rate": 7.844711451997827e-06, "loss": 0.8229, "step": 6199 }, { "epoch": 1.7103448275862068, "grad_norm": 3.8329501152038574, "learning_rate": 7.84396115056797e-06, "loss": 0.8075, "step": 6200 }, { "epoch": 1.7106206896551726, "grad_norm": 4.150167465209961, "learning_rate": 7.843210754458166e-06, "loss": 0.7322, "step": 6201 }, { "epoch": 1.710896551724138, "grad_norm": 4.256906986236572, "learning_rate": 7.842460263693402e-06, "loss": 0.8932, "step": 6202 }, { "epoch": 1.7111724137931035, "grad_norm": 3.869475841522217, "learning_rate": 7.841709678298656e-06, "loss": 0.7131, "step": 6203 }, { "epoch": 1.711448275862069, "grad_norm": 4.184509754180908, "learning_rate": 7.840958998298922e-06, "loss": 0.9021, "step": 6204 }, { "epoch": 1.7117241379310344, "grad_norm": 3.7866921424865723, "learning_rate": 7.84020822371919e-06, "loss": 0.8164, "step": 6205 }, { "epoch": 1.712, "grad_norm": 4.292204856872559, "learning_rate": 7.839457354584452e-06, "loss": 0.8565, "step": 6206 }, { "epoch": 1.7122758620689655, "grad_norm": 4.1715521812438965, "learning_rate": 7.838706390919708e-06, "loss": 0.9064, "step": 6207 }, { "epoch": 1.7125517241379309, "grad_norm": 4.079244136810303, "learning_rate": 7.83795533274996e-06, "loss": 0.9325, "step": 6208 }, { "epoch": 1.7128275862068967, "grad_norm": 3.8713266849517822, "learning_rate": 7.837204180100209e-06, "loss": 0.7567, "step": 6209 }, { "epoch": 1.713103448275862, "grad_norm": 3.563680410385132, "learning_rate": 7.836452932995464e-06, "loss": 0.6906, "step": 6210 }, { "epoch": 1.7133793103448276, "grad_norm": 4.332118034362793, "learning_rate": 7.835701591460733e-06, "loss": 0.796, "step": 6211 }, { "epoch": 1.7136551724137932, "grad_norm": 4.363499641418457, "learning_rate": 7.834950155521033e-06, "loss": 0.8004, "step": 6212 }, { "epoch": 1.7139310344827585, "grad_norm": 4.391286849975586, "learning_rate": 7.834198625201376e-06, "loss": 0.9631, "step": 6213 }, { "epoch": 1.714206896551724, "grad_norm": 4.066075325012207, "learning_rate": 7.833447000526786e-06, "loss": 0.8078, "step": 6214 }, { "epoch": 1.7144827586206897, "grad_norm": 4.056137561798096, "learning_rate": 7.832695281522281e-06, "loss": 0.8346, "step": 6215 }, { "epoch": 1.714758620689655, "grad_norm": 3.7459146976470947, "learning_rate": 7.83194346821289e-06, "loss": 0.8234, "step": 6216 }, { "epoch": 1.7150344827586208, "grad_norm": 3.833110809326172, "learning_rate": 7.831191560623643e-06, "loss": 0.815, "step": 6217 }, { "epoch": 1.7153103448275862, "grad_norm": 3.4922330379486084, "learning_rate": 7.830439558779572e-06, "loss": 0.7516, "step": 6218 }, { "epoch": 1.7155862068965517, "grad_norm": 3.978654384613037, "learning_rate": 7.829687462705708e-06, "loss": 0.8821, "step": 6219 }, { "epoch": 1.7158620689655173, "grad_norm": 4.470913887023926, "learning_rate": 7.828935272427097e-06, "loss": 0.8463, "step": 6220 }, { "epoch": 1.7161379310344826, "grad_norm": 3.7624852657318115, "learning_rate": 7.828182987968772e-06, "loss": 0.6682, "step": 6221 }, { "epoch": 1.7164137931034484, "grad_norm": 4.0233001708984375, "learning_rate": 7.827430609355783e-06, "loss": 0.8121, "step": 6222 }, { "epoch": 1.7166896551724138, "grad_norm": 4.113835334777832, "learning_rate": 7.826678136613178e-06, "loss": 0.7962, "step": 6223 }, { "epoch": 1.7169655172413794, "grad_norm": 3.7216298580169678, "learning_rate": 7.825925569766005e-06, "loss": 0.8216, "step": 6224 }, { "epoch": 1.717241379310345, "grad_norm": 4.1434454917907715, "learning_rate": 7.825172908839321e-06, "loss": 0.9171, "step": 6225 }, { "epoch": 1.7175172413793103, "grad_norm": 3.4636645317077637, "learning_rate": 7.824420153858182e-06, "loss": 0.79, "step": 6226 }, { "epoch": 1.7177931034482758, "grad_norm": 3.703357219696045, "learning_rate": 7.823667304847648e-06, "loss": 0.7003, "step": 6227 }, { "epoch": 1.7180689655172414, "grad_norm": 3.9776570796966553, "learning_rate": 7.822914361832786e-06, "loss": 0.8012, "step": 6228 }, { "epoch": 1.7183448275862068, "grad_norm": 4.031332015991211, "learning_rate": 7.822161324838658e-06, "loss": 0.8326, "step": 6229 }, { "epoch": 1.7186206896551726, "grad_norm": 3.590416193008423, "learning_rate": 7.821408193890334e-06, "loss": 0.7457, "step": 6230 }, { "epoch": 1.718896551724138, "grad_norm": 4.079172134399414, "learning_rate": 7.82065496901289e-06, "loss": 0.8401, "step": 6231 }, { "epoch": 1.7191724137931035, "grad_norm": 3.713651180267334, "learning_rate": 7.8199016502314e-06, "loss": 0.8633, "step": 6232 }, { "epoch": 1.719448275862069, "grad_norm": 4.199697017669678, "learning_rate": 7.819148237570943e-06, "loss": 0.7265, "step": 6233 }, { "epoch": 1.7197241379310344, "grad_norm": 3.9333291053771973, "learning_rate": 7.818394731056603e-06, "loss": 0.8104, "step": 6234 }, { "epoch": 1.72, "grad_norm": 3.7131729125976562, "learning_rate": 7.817641130713465e-06, "loss": 0.788, "step": 6235 }, { "epoch": 1.7202758620689655, "grad_norm": 4.783974647521973, "learning_rate": 7.816887436566616e-06, "loss": 0.7748, "step": 6236 }, { "epoch": 1.720551724137931, "grad_norm": 4.353356838226318, "learning_rate": 7.81613364864115e-06, "loss": 0.8015, "step": 6237 }, { "epoch": 1.7208275862068967, "grad_norm": 3.914759397506714, "learning_rate": 7.81537976696216e-06, "loss": 0.7559, "step": 6238 }, { "epoch": 1.721103448275862, "grad_norm": 4.1989240646362305, "learning_rate": 7.814625791554744e-06, "loss": 0.9154, "step": 6239 }, { "epoch": 1.7213793103448276, "grad_norm": 4.119356632232666, "learning_rate": 7.813871722444002e-06, "loss": 0.8795, "step": 6240 }, { "epoch": 1.7216551724137932, "grad_norm": 4.140307903289795, "learning_rate": 7.81311755965504e-06, "loss": 0.8026, "step": 6241 }, { "epoch": 1.7219310344827585, "grad_norm": 3.8461058139801025, "learning_rate": 7.812363303212966e-06, "loss": 0.7336, "step": 6242 }, { "epoch": 1.722206896551724, "grad_norm": 3.7764968872070312, "learning_rate": 7.811608953142889e-06, "loss": 0.7547, "step": 6243 }, { "epoch": 1.7224827586206897, "grad_norm": 3.7104029655456543, "learning_rate": 7.81085450946992e-06, "loss": 0.7823, "step": 6244 }, { "epoch": 1.722758620689655, "grad_norm": 4.008437633514404, "learning_rate": 7.810099972219181e-06, "loss": 0.8784, "step": 6245 }, { "epoch": 1.7230344827586208, "grad_norm": 3.719978094100952, "learning_rate": 7.809345341415789e-06, "loss": 0.7223, "step": 6246 }, { "epoch": 1.7233103448275862, "grad_norm": 4.199063777923584, "learning_rate": 7.808590617084867e-06, "loss": 0.8713, "step": 6247 }, { "epoch": 1.7235862068965517, "grad_norm": 3.718893051147461, "learning_rate": 7.80783579925154e-06, "loss": 0.8567, "step": 6248 }, { "epoch": 1.7238620689655173, "grad_norm": 3.9495344161987305, "learning_rate": 7.807080887940938e-06, "loss": 0.7901, "step": 6249 }, { "epoch": 1.7241379310344827, "grad_norm": 4.168396949768066, "learning_rate": 7.806325883178191e-06, "loss": 0.8789, "step": 6250 }, { "epoch": 1.7244137931034482, "grad_norm": 3.8769986629486084, "learning_rate": 7.80557078498844e-06, "loss": 0.7249, "step": 6251 }, { "epoch": 1.7246896551724138, "grad_norm": 4.02840518951416, "learning_rate": 7.804815593396818e-06, "loss": 0.98, "step": 6252 }, { "epoch": 1.7249655172413794, "grad_norm": 4.01158332824707, "learning_rate": 7.804060308428468e-06, "loss": 0.8407, "step": 6253 }, { "epoch": 1.725241379310345, "grad_norm": 4.066356658935547, "learning_rate": 7.803304930108536e-06, "loss": 0.7115, "step": 6254 }, { "epoch": 1.7255172413793103, "grad_norm": 3.901421546936035, "learning_rate": 7.802549458462167e-06, "loss": 0.8242, "step": 6255 }, { "epoch": 1.7257931034482759, "grad_norm": 4.016209602355957, "learning_rate": 7.801793893514515e-06, "loss": 0.8592, "step": 6256 }, { "epoch": 1.7260689655172414, "grad_norm": 3.9211032390594482, "learning_rate": 7.801038235290732e-06, "loss": 0.8892, "step": 6257 }, { "epoch": 1.7263448275862068, "grad_norm": 3.99102520942688, "learning_rate": 7.800282483815978e-06, "loss": 0.8122, "step": 6258 }, { "epoch": 1.7266206896551726, "grad_norm": 4.129476070404053, "learning_rate": 7.799526639115407e-06, "loss": 0.8155, "step": 6259 }, { "epoch": 1.726896551724138, "grad_norm": 4.181197166442871, "learning_rate": 7.798770701214186e-06, "loss": 0.8565, "step": 6260 }, { "epoch": 1.7271724137931035, "grad_norm": 3.741992235183716, "learning_rate": 7.798014670137483e-06, "loss": 0.766, "step": 6261 }, { "epoch": 1.727448275862069, "grad_norm": 4.193390846252441, "learning_rate": 7.797258545910465e-06, "loss": 0.8882, "step": 6262 }, { "epoch": 1.7277241379310344, "grad_norm": 3.7459685802459717, "learning_rate": 7.796502328558305e-06, "loss": 0.7824, "step": 6263 }, { "epoch": 1.728, "grad_norm": 3.6832525730133057, "learning_rate": 7.795746018106181e-06, "loss": 0.7875, "step": 6264 }, { "epoch": 1.7282758620689656, "grad_norm": 4.121317386627197, "learning_rate": 7.794989614579267e-06, "loss": 0.721, "step": 6265 }, { "epoch": 1.728551724137931, "grad_norm": 3.82468843460083, "learning_rate": 7.79423311800275e-06, "loss": 0.8965, "step": 6266 }, { "epoch": 1.7288275862068967, "grad_norm": 4.224127769470215, "learning_rate": 7.793476528401809e-06, "loss": 0.8521, "step": 6267 }, { "epoch": 1.729103448275862, "grad_norm": 3.846386194229126, "learning_rate": 7.79271984580164e-06, "loss": 0.7587, "step": 6268 }, { "epoch": 1.7293793103448276, "grad_norm": 4.157626628875732, "learning_rate": 7.791963070227427e-06, "loss": 0.9172, "step": 6269 }, { "epoch": 1.7296551724137932, "grad_norm": 3.8319637775421143, "learning_rate": 7.791206201704366e-06, "loss": 0.7411, "step": 6270 }, { "epoch": 1.7299310344827585, "grad_norm": 3.886713981628418, "learning_rate": 7.790449240257658e-06, "loss": 0.7688, "step": 6271 }, { "epoch": 1.730206896551724, "grad_norm": 4.253810882568359, "learning_rate": 7.789692185912501e-06, "loss": 0.8749, "step": 6272 }, { "epoch": 1.7304827586206897, "grad_norm": 4.2748494148254395, "learning_rate": 7.788935038694096e-06, "loss": 0.9018, "step": 6273 }, { "epoch": 1.730758620689655, "grad_norm": 3.988415241241455, "learning_rate": 7.788177798627653e-06, "loss": 0.8117, "step": 6274 }, { "epoch": 1.7310344827586208, "grad_norm": 4.0720624923706055, "learning_rate": 7.787420465738382e-06, "loss": 0.8325, "step": 6275 }, { "epoch": 1.7313103448275862, "grad_norm": 3.6404836177825928, "learning_rate": 7.786663040051494e-06, "loss": 0.7837, "step": 6276 }, { "epoch": 1.7315862068965517, "grad_norm": 3.8356311321258545, "learning_rate": 7.785905521592203e-06, "loss": 0.7061, "step": 6277 }, { "epoch": 1.7318620689655173, "grad_norm": 3.89693546295166, "learning_rate": 7.785147910385733e-06, "loss": 0.7815, "step": 6278 }, { "epoch": 1.7321379310344827, "grad_norm": 3.9911677837371826, "learning_rate": 7.784390206457302e-06, "loss": 0.9163, "step": 6279 }, { "epoch": 1.7324137931034482, "grad_norm": 3.9036223888397217, "learning_rate": 7.783632409832137e-06, "loss": 0.7849, "step": 6280 }, { "epoch": 1.7326896551724138, "grad_norm": 4.993154048919678, "learning_rate": 7.782874520535467e-06, "loss": 0.7801, "step": 6281 }, { "epoch": 1.7329655172413791, "grad_norm": 3.9440371990203857, "learning_rate": 7.78211653859252e-06, "loss": 0.8046, "step": 6282 }, { "epoch": 1.733241379310345, "grad_norm": 3.6347928047180176, "learning_rate": 7.781358464028535e-06, "loss": 0.8489, "step": 6283 }, { "epoch": 1.7335172413793103, "grad_norm": 3.935593843460083, "learning_rate": 7.780600296868744e-06, "loss": 0.8572, "step": 6284 }, { "epoch": 1.7337931034482759, "grad_norm": 4.27089786529541, "learning_rate": 7.779842037138393e-06, "loss": 0.8648, "step": 6285 }, { "epoch": 1.7340689655172414, "grad_norm": 3.9934909343719482, "learning_rate": 7.779083684862723e-06, "loss": 0.9407, "step": 6286 }, { "epoch": 1.7343448275862068, "grad_norm": 4.029943466186523, "learning_rate": 7.77832524006698e-06, "loss": 0.7895, "step": 6287 }, { "epoch": 1.7346206896551726, "grad_norm": 3.4713070392608643, "learning_rate": 7.777566702776416e-06, "loss": 0.7316, "step": 6288 }, { "epoch": 1.734896551724138, "grad_norm": 4.081430912017822, "learning_rate": 7.776808073016281e-06, "loss": 0.9818, "step": 6289 }, { "epoch": 1.7351724137931035, "grad_norm": 3.9838569164276123, "learning_rate": 7.776049350811836e-06, "loss": 0.9058, "step": 6290 }, { "epoch": 1.735448275862069, "grad_norm": 4.260560512542725, "learning_rate": 7.775290536188335e-06, "loss": 0.8917, "step": 6291 }, { "epoch": 1.7357241379310344, "grad_norm": 4.302438735961914, "learning_rate": 7.774531629171041e-06, "loss": 0.8339, "step": 6292 }, { "epoch": 1.736, "grad_norm": 3.953162670135498, "learning_rate": 7.77377262978522e-06, "loss": 0.7865, "step": 6293 }, { "epoch": 1.7362758620689656, "grad_norm": 4.2295241355896, "learning_rate": 7.773013538056144e-06, "loss": 0.7949, "step": 6294 }, { "epoch": 1.736551724137931, "grad_norm": 3.5732080936431885, "learning_rate": 7.772254354009078e-06, "loss": 0.8175, "step": 6295 }, { "epoch": 1.7368275862068967, "grad_norm": 4.103391647338867, "learning_rate": 7.771495077669299e-06, "loss": 0.7456, "step": 6296 }, { "epoch": 1.737103448275862, "grad_norm": 3.8612847328186035, "learning_rate": 7.770735709062086e-06, "loss": 0.788, "step": 6297 }, { "epoch": 1.7373793103448276, "grad_norm": 4.073736190795898, "learning_rate": 7.769976248212717e-06, "loss": 0.8101, "step": 6298 }, { "epoch": 1.7376551724137932, "grad_norm": 4.332559108734131, "learning_rate": 7.769216695146478e-06, "loss": 0.8151, "step": 6299 }, { "epoch": 1.7379310344827585, "grad_norm": 3.762247323989868, "learning_rate": 7.768457049888654e-06, "loss": 0.763, "step": 6300 }, { "epoch": 1.7382068965517241, "grad_norm": 4.079463958740234, "learning_rate": 7.767697312464535e-06, "loss": 0.7778, "step": 6301 }, { "epoch": 1.7384827586206897, "grad_norm": 3.9952030181884766, "learning_rate": 7.766937482899416e-06, "loss": 0.8525, "step": 6302 }, { "epoch": 1.738758620689655, "grad_norm": 3.7998030185699463, "learning_rate": 7.76617756121859e-06, "loss": 0.7866, "step": 6303 }, { "epoch": 1.7390344827586208, "grad_norm": 3.866666316986084, "learning_rate": 7.765417547447357e-06, "loss": 0.8018, "step": 6304 }, { "epoch": 1.7393103448275862, "grad_norm": 4.100278377532959, "learning_rate": 7.76465744161102e-06, "loss": 0.8172, "step": 6305 }, { "epoch": 1.7395862068965517, "grad_norm": 3.8842344284057617, "learning_rate": 7.763897243734883e-06, "loss": 0.8581, "step": 6306 }, { "epoch": 1.7398620689655173, "grad_norm": 4.179296493530273, "learning_rate": 7.763136953844253e-06, "loss": 0.8584, "step": 6307 }, { "epoch": 1.7401379310344827, "grad_norm": 3.315655469894409, "learning_rate": 7.762376571964446e-06, "loss": 0.8115, "step": 6308 }, { "epoch": 1.7404137931034482, "grad_norm": 3.7745094299316406, "learning_rate": 7.761616098120772e-06, "loss": 0.7262, "step": 6309 }, { "epoch": 1.7406896551724138, "grad_norm": 3.930281639099121, "learning_rate": 7.76085553233855e-06, "loss": 0.8516, "step": 6310 }, { "epoch": 1.7409655172413792, "grad_norm": 3.869851589202881, "learning_rate": 7.760094874643098e-06, "loss": 0.755, "step": 6311 }, { "epoch": 1.741241379310345, "grad_norm": 3.793653964996338, "learning_rate": 7.759334125059742e-06, "loss": 0.7711, "step": 6312 }, { "epoch": 1.7415172413793103, "grad_norm": 4.027730464935303, "learning_rate": 7.758573283613809e-06, "loss": 0.9296, "step": 6313 }, { "epoch": 1.7417931034482759, "grad_norm": 3.9419009685516357, "learning_rate": 7.757812350330626e-06, "loss": 0.78, "step": 6314 }, { "epoch": 1.7420689655172414, "grad_norm": 4.032092094421387, "learning_rate": 7.757051325235529e-06, "loss": 0.8419, "step": 6315 }, { "epoch": 1.7423448275862068, "grad_norm": 4.197104454040527, "learning_rate": 7.756290208353852e-06, "loss": 0.8889, "step": 6316 }, { "epoch": 1.7426206896551724, "grad_norm": 3.7830007076263428, "learning_rate": 7.755528999710933e-06, "loss": 0.7635, "step": 6317 }, { "epoch": 1.742896551724138, "grad_norm": 3.588759422302246, "learning_rate": 7.754767699332115e-06, "loss": 0.6692, "step": 6318 }, { "epoch": 1.7431724137931035, "grad_norm": 3.7769947052001953, "learning_rate": 7.754006307242743e-06, "loss": 0.8809, "step": 6319 }, { "epoch": 1.743448275862069, "grad_norm": 3.5277087688446045, "learning_rate": 7.753244823468163e-06, "loss": 0.7181, "step": 6320 }, { "epoch": 1.7437241379310344, "grad_norm": 4.496232509613037, "learning_rate": 7.752483248033728e-06, "loss": 0.8843, "step": 6321 }, { "epoch": 1.744, "grad_norm": 3.69997501373291, "learning_rate": 7.751721580964792e-06, "loss": 0.8377, "step": 6322 }, { "epoch": 1.7442758620689656, "grad_norm": 3.875598907470703, "learning_rate": 7.750959822286712e-06, "loss": 0.7087, "step": 6323 }, { "epoch": 1.744551724137931, "grad_norm": 3.9835703372955322, "learning_rate": 7.750197972024847e-06, "loss": 0.7954, "step": 6324 }, { "epoch": 1.7448275862068967, "grad_norm": 3.7093231678009033, "learning_rate": 7.74943603020456e-06, "loss": 0.8736, "step": 6325 }, { "epoch": 1.745103448275862, "grad_norm": 3.626188278198242, "learning_rate": 7.748673996851219e-06, "loss": 0.7873, "step": 6326 }, { "epoch": 1.7453793103448276, "grad_norm": 3.7206273078918457, "learning_rate": 7.747911871990193e-06, "loss": 0.7514, "step": 6327 }, { "epoch": 1.7456551724137932, "grad_norm": 3.4692931175231934, "learning_rate": 7.747149655646852e-06, "loss": 0.7895, "step": 6328 }, { "epoch": 1.7459310344827585, "grad_norm": 3.960003614425659, "learning_rate": 7.746387347846576e-06, "loss": 0.7493, "step": 6329 }, { "epoch": 1.7462068965517241, "grad_norm": 4.071225166320801, "learning_rate": 7.745624948614739e-06, "loss": 0.9089, "step": 6330 }, { "epoch": 1.7464827586206897, "grad_norm": 4.064518928527832, "learning_rate": 7.744862457976724e-06, "loss": 0.8285, "step": 6331 }, { "epoch": 1.746758620689655, "grad_norm": 3.9306371212005615, "learning_rate": 7.744099875957916e-06, "loss": 0.8286, "step": 6332 }, { "epoch": 1.7470344827586208, "grad_norm": 3.941859483718872, "learning_rate": 7.743337202583702e-06, "loss": 0.7961, "step": 6333 }, { "epoch": 1.7473103448275862, "grad_norm": 3.4377870559692383, "learning_rate": 7.74257443787947e-06, "loss": 0.7428, "step": 6334 }, { "epoch": 1.7475862068965518, "grad_norm": 3.895733594894409, "learning_rate": 7.74181158187062e-06, "loss": 0.7527, "step": 6335 }, { "epoch": 1.7478620689655173, "grad_norm": 4.396218776702881, "learning_rate": 7.741048634582544e-06, "loss": 0.8558, "step": 6336 }, { "epoch": 1.7481379310344827, "grad_norm": 4.060749053955078, "learning_rate": 7.740285596040644e-06, "loss": 0.7809, "step": 6337 }, { "epoch": 1.7484137931034482, "grad_norm": 4.204327583312988, "learning_rate": 7.739522466270319e-06, "loss": 0.7951, "step": 6338 }, { "epoch": 1.7486896551724138, "grad_norm": 3.8215172290802, "learning_rate": 7.738759245296978e-06, "loss": 0.7654, "step": 6339 }, { "epoch": 1.7489655172413792, "grad_norm": 4.06584358215332, "learning_rate": 7.737995933146029e-06, "loss": 0.8079, "step": 6340 }, { "epoch": 1.749241379310345, "grad_norm": 3.805244207382202, "learning_rate": 7.737232529842887e-06, "loss": 0.7891, "step": 6341 }, { "epoch": 1.7495172413793103, "grad_norm": 4.292098522186279, "learning_rate": 7.736469035412962e-06, "loss": 0.8144, "step": 6342 }, { "epoch": 1.7497931034482759, "grad_norm": 4.417497634887695, "learning_rate": 7.735705449881672e-06, "loss": 0.7806, "step": 6343 }, { "epoch": 1.7500689655172414, "grad_norm": 4.3022308349609375, "learning_rate": 7.734941773274441e-06, "loss": 0.9231, "step": 6344 }, { "epoch": 1.7503448275862068, "grad_norm": 4.200203895568848, "learning_rate": 7.734178005616692e-06, "loss": 0.9382, "step": 6345 }, { "epoch": 1.7506206896551724, "grad_norm": 4.279392242431641, "learning_rate": 7.733414146933853e-06, "loss": 0.807, "step": 6346 }, { "epoch": 1.750896551724138, "grad_norm": 3.8719210624694824, "learning_rate": 7.73265019725135e-06, "loss": 0.7551, "step": 6347 }, { "epoch": 1.7511724137931033, "grad_norm": 3.5485854148864746, "learning_rate": 7.731886156594621e-06, "loss": 0.6126, "step": 6348 }, { "epoch": 1.751448275862069, "grad_norm": 4.17846155166626, "learning_rate": 7.731122024989101e-06, "loss": 0.7671, "step": 6349 }, { "epoch": 1.7517241379310344, "grad_norm": 3.9173734188079834, "learning_rate": 7.730357802460226e-06, "loss": 0.848, "step": 6350 }, { "epoch": 1.752, "grad_norm": 3.7835309505462646, "learning_rate": 7.729593489033443e-06, "loss": 0.8551, "step": 6351 }, { "epoch": 1.7522758620689656, "grad_norm": 4.029319763183594, "learning_rate": 7.728829084734193e-06, "loss": 0.8505, "step": 6352 }, { "epoch": 1.752551724137931, "grad_norm": 4.032461643218994, "learning_rate": 7.728064589587925e-06, "loss": 0.847, "step": 6353 }, { "epoch": 1.7528275862068967, "grad_norm": 4.439662933349609, "learning_rate": 7.727300003620091e-06, "loss": 0.7897, "step": 6354 }, { "epoch": 1.753103448275862, "grad_norm": 3.7806239128112793, "learning_rate": 7.726535326856149e-06, "loss": 0.8372, "step": 6355 }, { "epoch": 1.7533793103448276, "grad_norm": 3.5528571605682373, "learning_rate": 7.725770559321548e-06, "loss": 0.6881, "step": 6356 }, { "epoch": 1.7536551724137932, "grad_norm": 3.911609649658203, "learning_rate": 7.725005701041755e-06, "loss": 0.7457, "step": 6357 }, { "epoch": 1.7539310344827586, "grad_norm": 3.971120834350586, "learning_rate": 7.724240752042231e-06, "loss": 0.8472, "step": 6358 }, { "epoch": 1.7542068965517241, "grad_norm": 3.9628427028656006, "learning_rate": 7.723475712348442e-06, "loss": 0.7276, "step": 6359 }, { "epoch": 1.7544827586206897, "grad_norm": 3.40000057220459, "learning_rate": 7.722710581985859e-06, "loss": 0.6601, "step": 6360 }, { "epoch": 1.754758620689655, "grad_norm": 4.2413787841796875, "learning_rate": 7.721945360979953e-06, "loss": 0.9312, "step": 6361 }, { "epoch": 1.7550344827586208, "grad_norm": 3.4525275230407715, "learning_rate": 7.7211800493562e-06, "loss": 0.7599, "step": 6362 }, { "epoch": 1.7553103448275862, "grad_norm": 4.144218921661377, "learning_rate": 7.720414647140078e-06, "loss": 0.8801, "step": 6363 }, { "epoch": 1.7555862068965518, "grad_norm": 4.08605432510376, "learning_rate": 7.71964915435707e-06, "loss": 0.836, "step": 6364 }, { "epoch": 1.7558620689655173, "grad_norm": 3.9155383110046387, "learning_rate": 7.718883571032659e-06, "loss": 0.8241, "step": 6365 }, { "epoch": 1.7561379310344827, "grad_norm": 3.424365997314453, "learning_rate": 7.71811789719233e-06, "loss": 0.7099, "step": 6366 }, { "epoch": 1.7564137931034483, "grad_norm": 4.014786243438721, "learning_rate": 7.717352132861577e-06, "loss": 0.8941, "step": 6367 }, { "epoch": 1.7566896551724138, "grad_norm": 3.9755020141601562, "learning_rate": 7.716586278065895e-06, "loss": 0.9586, "step": 6368 }, { "epoch": 1.7569655172413792, "grad_norm": 4.518918037414551, "learning_rate": 7.715820332830776e-06, "loss": 0.9128, "step": 6369 }, { "epoch": 1.757241379310345, "grad_norm": 3.5989625453948975, "learning_rate": 7.715054297181724e-06, "loss": 0.7655, "step": 6370 }, { "epoch": 1.7575172413793103, "grad_norm": 3.9791507720947266, "learning_rate": 7.714288171144236e-06, "loss": 0.8252, "step": 6371 }, { "epoch": 1.7577931034482759, "grad_norm": 4.3890509605407715, "learning_rate": 7.713521954743823e-06, "loss": 0.9315, "step": 6372 }, { "epoch": 1.7580689655172415, "grad_norm": 4.397753715515137, "learning_rate": 7.71275564800599e-06, "loss": 0.7162, "step": 6373 }, { "epoch": 1.7583448275862068, "grad_norm": 3.568319797515869, "learning_rate": 7.711989250956252e-06, "loss": 0.7707, "step": 6374 }, { "epoch": 1.7586206896551724, "grad_norm": 3.870415210723877, "learning_rate": 7.71122276362012e-06, "loss": 0.7543, "step": 6375 }, { "epoch": 1.758896551724138, "grad_norm": 3.690995931625366, "learning_rate": 7.71045618602311e-06, "loss": 0.6802, "step": 6376 }, { "epoch": 1.7591724137931033, "grad_norm": 4.07734489440918, "learning_rate": 7.709689518190749e-06, "loss": 0.7039, "step": 6377 }, { "epoch": 1.759448275862069, "grad_norm": 4.098386287689209, "learning_rate": 7.708922760148556e-06, "loss": 0.7895, "step": 6378 }, { "epoch": 1.7597241379310344, "grad_norm": 3.700925350189209, "learning_rate": 7.70815591192206e-06, "loss": 0.8025, "step": 6379 }, { "epoch": 1.76, "grad_norm": 4.404358386993408, "learning_rate": 7.707388973536788e-06, "loss": 0.9462, "step": 6380 }, { "epoch": 1.7602758620689656, "grad_norm": 4.526285648345947, "learning_rate": 7.706621945018273e-06, "loss": 0.8004, "step": 6381 }, { "epoch": 1.760551724137931, "grad_norm": 3.7212064266204834, "learning_rate": 7.705854826392055e-06, "loss": 0.7757, "step": 6382 }, { "epoch": 1.7608275862068965, "grad_norm": 4.313683986663818, "learning_rate": 7.705087617683667e-06, "loss": 0.7588, "step": 6383 }, { "epoch": 1.761103448275862, "grad_norm": 3.848874568939209, "learning_rate": 7.704320318918652e-06, "loss": 0.8245, "step": 6384 }, { "epoch": 1.7613793103448274, "grad_norm": 3.7747554779052734, "learning_rate": 7.703552930122555e-06, "loss": 0.8381, "step": 6385 }, { "epoch": 1.7616551724137932, "grad_norm": 3.66782546043396, "learning_rate": 7.702785451320924e-06, "loss": 0.755, "step": 6386 }, { "epoch": 1.7619310344827586, "grad_norm": 3.8907206058502197, "learning_rate": 7.70201788253931e-06, "loss": 0.831, "step": 6387 }, { "epoch": 1.7622068965517241, "grad_norm": 3.808206081390381, "learning_rate": 7.701250223803266e-06, "loss": 0.863, "step": 6388 }, { "epoch": 1.7624827586206897, "grad_norm": 3.9571845531463623, "learning_rate": 7.700482475138347e-06, "loss": 0.7815, "step": 6389 }, { "epoch": 1.762758620689655, "grad_norm": 4.409331798553467, "learning_rate": 7.699714636570115e-06, "loss": 0.8551, "step": 6390 }, { "epoch": 1.7630344827586208, "grad_norm": 3.7922608852386475, "learning_rate": 7.698946708124132e-06, "loss": 0.7728, "step": 6391 }, { "epoch": 1.7633103448275862, "grad_norm": 3.9093105792999268, "learning_rate": 7.698178689825965e-06, "loss": 0.8588, "step": 6392 }, { "epoch": 1.7635862068965518, "grad_norm": 4.10587215423584, "learning_rate": 7.697410581701178e-06, "loss": 0.9328, "step": 6393 }, { "epoch": 1.7638620689655173, "grad_norm": 3.9071671962738037, "learning_rate": 7.696642383775346e-06, "loss": 0.8639, "step": 6394 }, { "epoch": 1.7641379310344827, "grad_norm": 3.57787823677063, "learning_rate": 7.695874096074045e-06, "loss": 0.7629, "step": 6395 }, { "epoch": 1.7644137931034483, "grad_norm": 3.8331491947174072, "learning_rate": 7.695105718622848e-06, "loss": 0.6785, "step": 6396 }, { "epoch": 1.7646896551724138, "grad_norm": 3.8031187057495117, "learning_rate": 7.694337251447337e-06, "loss": 0.8268, "step": 6397 }, { "epoch": 1.7649655172413792, "grad_norm": 3.452885389328003, "learning_rate": 7.693568694573096e-06, "loss": 0.7113, "step": 6398 }, { "epoch": 1.765241379310345, "grad_norm": 3.7696049213409424, "learning_rate": 7.692800048025715e-06, "loss": 0.745, "step": 6399 }, { "epoch": 1.7655172413793103, "grad_norm": 4.114344120025635, "learning_rate": 7.692031311830778e-06, "loss": 0.7493, "step": 6400 }, { "epoch": 1.765793103448276, "grad_norm": 3.915532350540161, "learning_rate": 7.691262486013881e-06, "loss": 0.8048, "step": 6401 }, { "epoch": 1.7660689655172415, "grad_norm": 3.6929168701171875, "learning_rate": 7.690493570600617e-06, "loss": 0.7426, "step": 6402 }, { "epoch": 1.7663448275862068, "grad_norm": 3.8093619346618652, "learning_rate": 7.689724565616585e-06, "loss": 0.7459, "step": 6403 }, { "epoch": 1.7666206896551724, "grad_norm": 4.177196025848389, "learning_rate": 7.688955471087388e-06, "loss": 0.9052, "step": 6404 }, { "epoch": 1.766896551724138, "grad_norm": 4.302940845489502, "learning_rate": 7.688186287038628e-06, "loss": 0.9004, "step": 6405 }, { "epoch": 1.7671724137931033, "grad_norm": 4.06972599029541, "learning_rate": 7.687417013495913e-06, "loss": 0.7881, "step": 6406 }, { "epoch": 1.767448275862069, "grad_norm": 4.701390743255615, "learning_rate": 7.686647650484854e-06, "loss": 0.8234, "step": 6407 }, { "epoch": 1.7677241379310344, "grad_norm": 3.9479756355285645, "learning_rate": 7.685878198031065e-06, "loss": 0.7474, "step": 6408 }, { "epoch": 1.768, "grad_norm": 4.105659008026123, "learning_rate": 7.685108656160163e-06, "loss": 0.9114, "step": 6409 }, { "epoch": 1.7682758620689656, "grad_norm": 4.477279186248779, "learning_rate": 7.684339024897765e-06, "loss": 1.0087, "step": 6410 }, { "epoch": 1.768551724137931, "grad_norm": 3.836247682571411, "learning_rate": 7.683569304269492e-06, "loss": 0.8199, "step": 6411 }, { "epoch": 1.7688275862068965, "grad_norm": 3.788447856903076, "learning_rate": 7.682799494300972e-06, "loss": 0.8212, "step": 6412 }, { "epoch": 1.769103448275862, "grad_norm": 4.637674331665039, "learning_rate": 7.682029595017834e-06, "loss": 0.7754, "step": 6413 }, { "epoch": 1.7693793103448274, "grad_norm": 3.7401533126831055, "learning_rate": 7.681259606445705e-06, "loss": 0.8638, "step": 6414 }, { "epoch": 1.7696551724137932, "grad_norm": 3.7473156452178955, "learning_rate": 7.680489528610225e-06, "loss": 0.8273, "step": 6415 }, { "epoch": 1.7699310344827586, "grad_norm": 3.499037265777588, "learning_rate": 7.679719361537024e-06, "loss": 0.7709, "step": 6416 }, { "epoch": 1.7702068965517241, "grad_norm": 4.047844886779785, "learning_rate": 7.678949105251748e-06, "loss": 0.9128, "step": 6417 }, { "epoch": 1.7704827586206897, "grad_norm": 3.923696994781494, "learning_rate": 7.678178759780038e-06, "loss": 0.771, "step": 6418 }, { "epoch": 1.770758620689655, "grad_norm": 3.857820510864258, "learning_rate": 7.677408325147538e-06, "loss": 0.7711, "step": 6419 }, { "epoch": 1.7710344827586209, "grad_norm": 4.0239481925964355, "learning_rate": 7.6766378013799e-06, "loss": 0.7373, "step": 6420 }, { "epoch": 1.7713103448275862, "grad_norm": 4.027899265289307, "learning_rate": 7.675867188502776e-06, "loss": 0.8197, "step": 6421 }, { "epoch": 1.7715862068965518, "grad_norm": 3.988579034805298, "learning_rate": 7.67509648654182e-06, "loss": 0.8439, "step": 6422 }, { "epoch": 1.7718620689655173, "grad_norm": 3.780156135559082, "learning_rate": 7.674325695522687e-06, "loss": 0.9732, "step": 6423 }, { "epoch": 1.7721379310344827, "grad_norm": 3.8185975551605225, "learning_rate": 7.673554815471042e-06, "loss": 0.8329, "step": 6424 }, { "epoch": 1.7724137931034483, "grad_norm": 4.584541320800781, "learning_rate": 7.672783846412545e-06, "loss": 0.9091, "step": 6425 }, { "epoch": 1.7726896551724138, "grad_norm": 3.777644157409668, "learning_rate": 7.67201278837287e-06, "loss": 0.694, "step": 6426 }, { "epoch": 1.7729655172413792, "grad_norm": 4.0268964767456055, "learning_rate": 7.671241641377679e-06, "loss": 0.8663, "step": 6427 }, { "epoch": 1.773241379310345, "grad_norm": 4.164525508880615, "learning_rate": 7.670470405452647e-06, "loss": 0.8574, "step": 6428 }, { "epoch": 1.7735172413793103, "grad_norm": 3.7950897216796875, "learning_rate": 7.66969908062345e-06, "loss": 0.735, "step": 6429 }, { "epoch": 1.773793103448276, "grad_norm": 4.289608001708984, "learning_rate": 7.66892766691577e-06, "loss": 0.807, "step": 6430 }, { "epoch": 1.7740689655172415, "grad_norm": 4.183993816375732, "learning_rate": 7.66815616435528e-06, "loss": 0.7682, "step": 6431 }, { "epoch": 1.7743448275862068, "grad_norm": 3.9108808040618896, "learning_rate": 7.667384572967675e-06, "loss": 0.6984, "step": 6432 }, { "epoch": 1.7746206896551724, "grad_norm": 3.600713014602661, "learning_rate": 7.666612892778634e-06, "loss": 0.7411, "step": 6433 }, { "epoch": 1.774896551724138, "grad_norm": 3.9475576877593994, "learning_rate": 7.665841123813851e-06, "loss": 0.7393, "step": 6434 }, { "epoch": 1.7751724137931033, "grad_norm": 3.9023849964141846, "learning_rate": 7.66506926609902e-06, "loss": 0.7389, "step": 6435 }, { "epoch": 1.775448275862069, "grad_norm": 3.844104051589966, "learning_rate": 7.664297319659837e-06, "loss": 0.8518, "step": 6436 }, { "epoch": 1.7757241379310345, "grad_norm": 4.0032267570495605, "learning_rate": 7.663525284522e-06, "loss": 0.8932, "step": 6437 }, { "epoch": 1.776, "grad_norm": 3.7936346530914307, "learning_rate": 7.662753160711214e-06, "loss": 0.8421, "step": 6438 }, { "epoch": 1.7762758620689656, "grad_norm": 3.741689443588257, "learning_rate": 7.66198094825318e-06, "loss": 0.8363, "step": 6439 }, { "epoch": 1.776551724137931, "grad_norm": 3.766798257827759, "learning_rate": 7.66120864717361e-06, "loss": 0.7353, "step": 6440 }, { "epoch": 1.7768275862068965, "grad_norm": 4.01901912689209, "learning_rate": 7.660436257498213e-06, "loss": 0.7491, "step": 6441 }, { "epoch": 1.777103448275862, "grad_norm": 3.924466609954834, "learning_rate": 7.659663779252704e-06, "loss": 0.7897, "step": 6442 }, { "epoch": 1.7773793103448274, "grad_norm": 3.937551975250244, "learning_rate": 7.6588912124628e-06, "loss": 0.6998, "step": 6443 }, { "epoch": 1.7776551724137932, "grad_norm": 3.8650362491607666, "learning_rate": 7.65811855715422e-06, "loss": 0.7823, "step": 6444 }, { "epoch": 1.7779310344827586, "grad_norm": 3.503666877746582, "learning_rate": 7.657345813352688e-06, "loss": 0.8213, "step": 6445 }, { "epoch": 1.7782068965517241, "grad_norm": 4.14503812789917, "learning_rate": 7.65657298108393e-06, "loss": 0.9768, "step": 6446 }, { "epoch": 1.7784827586206897, "grad_norm": 4.21505069732666, "learning_rate": 7.655800060373675e-06, "loss": 0.8956, "step": 6447 }, { "epoch": 1.778758620689655, "grad_norm": 3.7773005962371826, "learning_rate": 7.655027051247652e-06, "loss": 0.8094, "step": 6448 }, { "epoch": 1.7790344827586206, "grad_norm": 4.266542911529541, "learning_rate": 7.6542539537316e-06, "loss": 0.9402, "step": 6449 }, { "epoch": 1.7793103448275862, "grad_norm": 3.8827335834503174, "learning_rate": 7.653480767851251e-06, "loss": 0.8163, "step": 6450 }, { "epoch": 1.7795862068965516, "grad_norm": 3.799964666366577, "learning_rate": 7.652707493632351e-06, "loss": 0.7726, "step": 6451 }, { "epoch": 1.7798620689655174, "grad_norm": 3.8243768215179443, "learning_rate": 7.651934131100643e-06, "loss": 0.7361, "step": 6452 }, { "epoch": 1.7801379310344827, "grad_norm": 4.253640174865723, "learning_rate": 7.65116068028187e-06, "loss": 0.8584, "step": 6453 }, { "epoch": 1.7804137931034483, "grad_norm": 4.08327054977417, "learning_rate": 7.650387141201783e-06, "loss": 0.8987, "step": 6454 }, { "epoch": 1.7806896551724138, "grad_norm": 3.90079402923584, "learning_rate": 7.649613513886137e-06, "loss": 0.6719, "step": 6455 }, { "epoch": 1.7809655172413792, "grad_norm": 4.172662258148193, "learning_rate": 7.648839798360681e-06, "loss": 0.8212, "step": 6456 }, { "epoch": 1.781241379310345, "grad_norm": 3.917954206466675, "learning_rate": 7.648065994651181e-06, "loss": 0.7767, "step": 6457 }, { "epoch": 1.7815172413793103, "grad_norm": 4.0279998779296875, "learning_rate": 7.647292102783391e-06, "loss": 0.8003, "step": 6458 }, { "epoch": 1.781793103448276, "grad_norm": 4.332919597625732, "learning_rate": 7.64651812278308e-06, "loss": 0.7746, "step": 6459 }, { "epoch": 1.7820689655172415, "grad_norm": 3.5438578128814697, "learning_rate": 7.645744054676015e-06, "loss": 0.6979, "step": 6460 }, { "epoch": 1.7823448275862068, "grad_norm": 3.8255581855773926, "learning_rate": 7.644969898487962e-06, "loss": 0.8118, "step": 6461 }, { "epoch": 1.7826206896551724, "grad_norm": 3.6625800132751465, "learning_rate": 7.644195654244694e-06, "loss": 0.8076, "step": 6462 }, { "epoch": 1.782896551724138, "grad_norm": 3.916191339492798, "learning_rate": 7.643421321971993e-06, "loss": 0.8021, "step": 6463 }, { "epoch": 1.7831724137931033, "grad_norm": 4.092611312866211, "learning_rate": 7.642646901695634e-06, "loss": 0.8382, "step": 6464 }, { "epoch": 1.783448275862069, "grad_norm": 4.322846412658691, "learning_rate": 7.641872393441397e-06, "loss": 0.8283, "step": 6465 }, { "epoch": 1.7837241379310345, "grad_norm": 4.045248508453369, "learning_rate": 7.641097797235066e-06, "loss": 0.8891, "step": 6466 }, { "epoch": 1.784, "grad_norm": 4.3640947341918945, "learning_rate": 7.640323113102434e-06, "loss": 0.7937, "step": 6467 }, { "epoch": 1.7842758620689656, "grad_norm": 3.815119504928589, "learning_rate": 7.639548341069287e-06, "loss": 0.6696, "step": 6468 }, { "epoch": 1.784551724137931, "grad_norm": 3.759323835372925, "learning_rate": 7.63877348116142e-06, "loss": 0.7346, "step": 6469 }, { "epoch": 1.7848275862068965, "grad_norm": 3.509273052215576, "learning_rate": 7.637998533404624e-06, "loss": 0.7091, "step": 6470 }, { "epoch": 1.785103448275862, "grad_norm": 3.970604658126831, "learning_rate": 7.637223497824707e-06, "loss": 0.9241, "step": 6471 }, { "epoch": 1.7853793103448274, "grad_norm": 4.153894901275635, "learning_rate": 7.636448374447465e-06, "loss": 0.8226, "step": 6472 }, { "epoch": 1.7856551724137932, "grad_norm": 3.747755765914917, "learning_rate": 7.635673163298708e-06, "loss": 0.7624, "step": 6473 }, { "epoch": 1.7859310344827586, "grad_norm": 4.442868709564209, "learning_rate": 7.63489786440424e-06, "loss": 0.8632, "step": 6474 }, { "epoch": 1.7862068965517242, "grad_norm": 3.809257984161377, "learning_rate": 7.634122477789871e-06, "loss": 0.8544, "step": 6475 }, { "epoch": 1.7864827586206897, "grad_norm": 3.747497797012329, "learning_rate": 7.63334700348142e-06, "loss": 0.7374, "step": 6476 }, { "epoch": 1.786758620689655, "grad_norm": 4.132190227508545, "learning_rate": 7.6325714415047e-06, "loss": 0.8579, "step": 6477 }, { "epoch": 1.7870344827586206, "grad_norm": 4.305112838745117, "learning_rate": 7.63179579188553e-06, "loss": 0.8486, "step": 6478 }, { "epoch": 1.7873103448275862, "grad_norm": 3.479017734527588, "learning_rate": 7.631020054649734e-06, "loss": 0.7263, "step": 6479 }, { "epoch": 1.7875862068965516, "grad_norm": 4.073398113250732, "learning_rate": 7.630244229823138e-06, "loss": 0.7917, "step": 6480 }, { "epoch": 1.7878620689655174, "grad_norm": 3.7601840496063232, "learning_rate": 7.62946831743157e-06, "loss": 0.8517, "step": 6481 }, { "epoch": 1.7881379310344827, "grad_norm": 3.6755261421203613, "learning_rate": 7.628692317500861e-06, "loss": 0.6965, "step": 6482 }, { "epoch": 1.7884137931034483, "grad_norm": 3.788632869720459, "learning_rate": 7.6279162300568444e-06, "loss": 0.8267, "step": 6483 }, { "epoch": 1.7886896551724139, "grad_norm": 4.254398822784424, "learning_rate": 7.627140055125361e-06, "loss": 0.8836, "step": 6484 }, { "epoch": 1.7889655172413792, "grad_norm": 3.8231863975524902, "learning_rate": 7.626363792732247e-06, "loss": 0.8416, "step": 6485 }, { "epoch": 1.789241379310345, "grad_norm": 3.818857192993164, "learning_rate": 7.625587442903346e-06, "loss": 0.7728, "step": 6486 }, { "epoch": 1.7895172413793103, "grad_norm": 4.4213433265686035, "learning_rate": 7.624811005664505e-06, "loss": 0.9044, "step": 6487 }, { "epoch": 1.789793103448276, "grad_norm": 3.92439603805542, "learning_rate": 7.6240344810415734e-06, "loss": 0.7456, "step": 6488 }, { "epoch": 1.7900689655172415, "grad_norm": 3.733288049697876, "learning_rate": 7.623257869060401e-06, "loss": 0.8543, "step": 6489 }, { "epoch": 1.7903448275862068, "grad_norm": 3.6669423580169678, "learning_rate": 7.622481169746845e-06, "loss": 0.7731, "step": 6490 }, { "epoch": 1.7906206896551724, "grad_norm": 4.070236682891846, "learning_rate": 7.621704383126759e-06, "loss": 0.8534, "step": 6491 }, { "epoch": 1.790896551724138, "grad_norm": 3.9048328399658203, "learning_rate": 7.620927509226009e-06, "loss": 0.8085, "step": 6492 }, { "epoch": 1.7911724137931033, "grad_norm": 4.583340644836426, "learning_rate": 7.62015054807045e-06, "loss": 0.8163, "step": 6493 }, { "epoch": 1.7914482758620691, "grad_norm": 3.9055867195129395, "learning_rate": 7.619373499685958e-06, "loss": 0.7904, "step": 6494 }, { "epoch": 1.7917241379310345, "grad_norm": 3.9563302993774414, "learning_rate": 7.6185963640983966e-06, "loss": 0.8571, "step": 6495 }, { "epoch": 1.792, "grad_norm": 4.08488655090332, "learning_rate": 7.617819141333639e-06, "loss": 0.731, "step": 6496 }, { "epoch": 1.7922758620689656, "grad_norm": 3.5096962451934814, "learning_rate": 7.617041831417558e-06, "loss": 0.7781, "step": 6497 }, { "epoch": 1.792551724137931, "grad_norm": 4.025558948516846, "learning_rate": 7.616264434376035e-06, "loss": 0.8214, "step": 6498 }, { "epoch": 1.7928275862068965, "grad_norm": 3.8341221809387207, "learning_rate": 7.61548695023495e-06, "loss": 0.9846, "step": 6499 }, { "epoch": 1.793103448275862, "grad_norm": 3.9657130241394043, "learning_rate": 7.614709379020184e-06, "loss": 0.8644, "step": 6500 }, { "epoch": 1.793103448275862, "eval_loss": 1.2544188499450684, "eval_runtime": 14.0297, "eval_samples_per_second": 28.511, "eval_steps_per_second": 3.564, "step": 6500 }, { "epoch": 1.7933793103448274, "grad_norm": 3.8921053409576416, "learning_rate": 7.613931720757628e-06, "loss": 0.8076, "step": 6501 }, { "epoch": 1.7936551724137932, "grad_norm": 3.9615728855133057, "learning_rate": 7.613153975473167e-06, "loss": 0.8713, "step": 6502 }, { "epoch": 1.7939310344827586, "grad_norm": 3.6973519325256348, "learning_rate": 7.612376143192697e-06, "loss": 0.6935, "step": 6503 }, { "epoch": 1.7942068965517242, "grad_norm": 3.8775949478149414, "learning_rate": 7.611598223942111e-06, "loss": 0.8678, "step": 6504 }, { "epoch": 1.7944827586206897, "grad_norm": 4.242443084716797, "learning_rate": 7.6108202177473075e-06, "loss": 0.965, "step": 6505 }, { "epoch": 1.794758620689655, "grad_norm": 3.8485004901885986, "learning_rate": 7.610042124634187e-06, "loss": 0.7301, "step": 6506 }, { "epoch": 1.7950344827586207, "grad_norm": 4.114335060119629, "learning_rate": 7.609263944628655e-06, "loss": 0.8563, "step": 6507 }, { "epoch": 1.7953103448275862, "grad_norm": 3.5725820064544678, "learning_rate": 7.608485677756619e-06, "loss": 0.793, "step": 6508 }, { "epoch": 1.7955862068965516, "grad_norm": 4.233544826507568, "learning_rate": 7.607707324043985e-06, "loss": 0.8487, "step": 6509 }, { "epoch": 1.7958620689655174, "grad_norm": 3.954923629760742, "learning_rate": 7.606928883516669e-06, "loss": 0.8099, "step": 6510 }, { "epoch": 1.7961379310344827, "grad_norm": 3.7818169593811035, "learning_rate": 7.606150356200586e-06, "loss": 0.8562, "step": 6511 }, { "epoch": 1.7964137931034483, "grad_norm": 4.2931294441223145, "learning_rate": 7.605371742121652e-06, "loss": 0.7185, "step": 6512 }, { "epoch": 1.7966896551724139, "grad_norm": 3.691678047180176, "learning_rate": 7.604593041305792e-06, "loss": 0.7351, "step": 6513 }, { "epoch": 1.7969655172413792, "grad_norm": 3.959263324737549, "learning_rate": 7.603814253778928e-06, "loss": 0.7433, "step": 6514 }, { "epoch": 1.7972413793103448, "grad_norm": 3.7088136672973633, "learning_rate": 7.603035379566987e-06, "loss": 0.7325, "step": 6515 }, { "epoch": 1.7975172413793103, "grad_norm": 3.5498061180114746, "learning_rate": 7.6022564186959e-06, "loss": 0.6678, "step": 6516 }, { "epoch": 1.7977931034482757, "grad_norm": 3.7019472122192383, "learning_rate": 7.601477371191599e-06, "loss": 0.7322, "step": 6517 }, { "epoch": 1.7980689655172415, "grad_norm": 3.988801956176758, "learning_rate": 7.600698237080019e-06, "loss": 0.7356, "step": 6518 }, { "epoch": 1.7983448275862068, "grad_norm": 4.149747371673584, "learning_rate": 7.5999190163871e-06, "loss": 0.8969, "step": 6519 }, { "epoch": 1.7986206896551724, "grad_norm": 3.724348306655884, "learning_rate": 7.599139709138783e-06, "loss": 0.6817, "step": 6520 }, { "epoch": 1.798896551724138, "grad_norm": 4.153440952301025, "learning_rate": 7.598360315361013e-06, "loss": 0.8268, "step": 6521 }, { "epoch": 1.7991724137931033, "grad_norm": 4.085854530334473, "learning_rate": 7.597580835079736e-06, "loss": 0.8393, "step": 6522 }, { "epoch": 1.7994482758620691, "grad_norm": 3.840043544769287, "learning_rate": 7.596801268320903e-06, "loss": 0.8581, "step": 6523 }, { "epoch": 1.7997241379310345, "grad_norm": 4.367500305175781, "learning_rate": 7.5960216151104674e-06, "loss": 0.8584, "step": 6524 }, { "epoch": 1.8, "grad_norm": 3.7762560844421387, "learning_rate": 7.595241875474384e-06, "loss": 0.7861, "step": 6525 }, { "epoch": 1.8002758620689656, "grad_norm": 3.9665145874023438, "learning_rate": 7.594462049438611e-06, "loss": 0.7029, "step": 6526 }, { "epoch": 1.800551724137931, "grad_norm": 3.7618632316589355, "learning_rate": 7.593682137029111e-06, "loss": 0.7406, "step": 6527 }, { "epoch": 1.8008275862068965, "grad_norm": 4.112208843231201, "learning_rate": 7.592902138271849e-06, "loss": 0.7704, "step": 6528 }, { "epoch": 1.801103448275862, "grad_norm": 3.775134563446045, "learning_rate": 7.592122053192792e-06, "loss": 0.8334, "step": 6529 }, { "epoch": 1.8013793103448275, "grad_norm": 3.84723162651062, "learning_rate": 7.5913418818179105e-06, "loss": 0.8321, "step": 6530 }, { "epoch": 1.8016551724137932, "grad_norm": 3.9755563735961914, "learning_rate": 7.590561624173176e-06, "loss": 0.9433, "step": 6531 }, { "epoch": 1.8019310344827586, "grad_norm": 3.8266022205352783, "learning_rate": 7.589781280284567e-06, "loss": 0.6178, "step": 6532 }, { "epoch": 1.8022068965517242, "grad_norm": 3.497809410095215, "learning_rate": 7.589000850178061e-06, "loss": 0.8744, "step": 6533 }, { "epoch": 1.8024827586206897, "grad_norm": 3.880911350250244, "learning_rate": 7.588220333879642e-06, "loss": 0.8311, "step": 6534 }, { "epoch": 1.802758620689655, "grad_norm": 3.975344657897949, "learning_rate": 7.587439731415289e-06, "loss": 0.8591, "step": 6535 }, { "epoch": 1.8030344827586207, "grad_norm": 3.687899112701416, "learning_rate": 7.586659042810993e-06, "loss": 0.7485, "step": 6536 }, { "epoch": 1.8033103448275862, "grad_norm": 4.105549335479736, "learning_rate": 7.585878268092746e-06, "loss": 0.8264, "step": 6537 }, { "epoch": 1.8035862068965516, "grad_norm": 4.170409202575684, "learning_rate": 7.58509740728654e-06, "loss": 0.8684, "step": 6538 }, { "epoch": 1.8038620689655174, "grad_norm": 3.6137688159942627, "learning_rate": 7.58431646041837e-06, "loss": 0.7441, "step": 6539 }, { "epoch": 1.8041379310344827, "grad_norm": 4.656208515167236, "learning_rate": 7.583535427514236e-06, "loss": 0.9377, "step": 6540 }, { "epoch": 1.8044137931034483, "grad_norm": 4.3437724113464355, "learning_rate": 7.582754308600138e-06, "loss": 0.8806, "step": 6541 }, { "epoch": 1.8046896551724139, "grad_norm": 3.6250720024108887, "learning_rate": 7.581973103702085e-06, "loss": 0.7486, "step": 6542 }, { "epoch": 1.8049655172413792, "grad_norm": 4.068323135375977, "learning_rate": 7.581191812846079e-06, "loss": 0.8735, "step": 6543 }, { "epoch": 1.8052413793103448, "grad_norm": 3.557732105255127, "learning_rate": 7.580410436058136e-06, "loss": 0.8171, "step": 6544 }, { "epoch": 1.8055172413793104, "grad_norm": 3.94549822807312, "learning_rate": 7.579628973364263e-06, "loss": 0.8904, "step": 6545 }, { "epoch": 1.8057931034482757, "grad_norm": 3.913954496383667, "learning_rate": 7.578847424790481e-06, "loss": 0.7636, "step": 6546 }, { "epoch": 1.8060689655172415, "grad_norm": 3.9727094173431396, "learning_rate": 7.578065790362807e-06, "loss": 0.8407, "step": 6547 }, { "epoch": 1.8063448275862068, "grad_norm": 3.817474842071533, "learning_rate": 7.577284070107264e-06, "loss": 0.9033, "step": 6548 }, { "epoch": 1.8066206896551724, "grad_norm": 3.824235200881958, "learning_rate": 7.576502264049875e-06, "loss": 0.7588, "step": 6549 }, { "epoch": 1.806896551724138, "grad_norm": 3.44875431060791, "learning_rate": 7.575720372216671e-06, "loss": 0.7029, "step": 6550 }, { "epoch": 1.8071724137931033, "grad_norm": 3.7283570766448975, "learning_rate": 7.5749383946336775e-06, "loss": 0.7985, "step": 6551 }, { "epoch": 1.8074482758620691, "grad_norm": 3.868155002593994, "learning_rate": 7.5741563313269305e-06, "loss": 0.8209, "step": 6552 }, { "epoch": 1.8077241379310345, "grad_norm": 4.048840522766113, "learning_rate": 7.573374182322466e-06, "loss": 0.7625, "step": 6553 }, { "epoch": 1.808, "grad_norm": 3.8491334915161133, "learning_rate": 7.572591947646322e-06, "loss": 0.7377, "step": 6554 }, { "epoch": 1.8082758620689656, "grad_norm": 3.64142107963562, "learning_rate": 7.571809627324542e-06, "loss": 0.8429, "step": 6555 }, { "epoch": 1.808551724137931, "grad_norm": 3.7606399059295654, "learning_rate": 7.571027221383169e-06, "loss": 0.8585, "step": 6556 }, { "epoch": 1.8088275862068965, "grad_norm": 4.020027160644531, "learning_rate": 7.570244729848254e-06, "loss": 0.8115, "step": 6557 }, { "epoch": 1.8091034482758621, "grad_norm": 3.9086146354675293, "learning_rate": 7.569462152745842e-06, "loss": 0.8293, "step": 6558 }, { "epoch": 1.8093793103448275, "grad_norm": 3.8995656967163086, "learning_rate": 7.568679490101991e-06, "loss": 0.8179, "step": 6559 }, { "epoch": 1.8096551724137933, "grad_norm": 3.7797913551330566, "learning_rate": 7.567896741942753e-06, "loss": 0.7031, "step": 6560 }, { "epoch": 1.8099310344827586, "grad_norm": 3.660829782485962, "learning_rate": 7.567113908294191e-06, "loss": 0.6804, "step": 6561 }, { "epoch": 1.8102068965517242, "grad_norm": 4.09998083114624, "learning_rate": 7.566330989182362e-06, "loss": 0.818, "step": 6562 }, { "epoch": 1.8104827586206897, "grad_norm": 3.893860340118408, "learning_rate": 7.565547984633337e-06, "loss": 0.8859, "step": 6563 }, { "epoch": 1.810758620689655, "grad_norm": 3.9970450401306152, "learning_rate": 7.564764894673177e-06, "loss": 0.8023, "step": 6564 }, { "epoch": 1.8110344827586207, "grad_norm": 4.037509441375732, "learning_rate": 7.563981719327955e-06, "loss": 0.9368, "step": 6565 }, { "epoch": 1.8113103448275862, "grad_norm": 3.924433708190918, "learning_rate": 7.563198458623744e-06, "loss": 0.7631, "step": 6566 }, { "epoch": 1.8115862068965516, "grad_norm": 4.289885997772217, "learning_rate": 7.5624151125866215e-06, "loss": 0.885, "step": 6567 }, { "epoch": 1.8118620689655174, "grad_norm": 3.8020668029785156, "learning_rate": 7.5616316812426646e-06, "loss": 0.8293, "step": 6568 }, { "epoch": 1.8121379310344827, "grad_norm": 4.264369964599609, "learning_rate": 7.560848164617956e-06, "loss": 0.794, "step": 6569 }, { "epoch": 1.8124137931034483, "grad_norm": 3.861530065536499, "learning_rate": 7.560064562738579e-06, "loss": 0.7981, "step": 6570 }, { "epoch": 1.8126896551724139, "grad_norm": 3.6362104415893555, "learning_rate": 7.559280875630621e-06, "loss": 0.75, "step": 6571 }, { "epoch": 1.8129655172413792, "grad_norm": 4.143261432647705, "learning_rate": 7.5584971033201724e-06, "loss": 0.8986, "step": 6572 }, { "epoch": 1.8132413793103448, "grad_norm": 4.333508491516113, "learning_rate": 7.557713245833327e-06, "loss": 0.7746, "step": 6573 }, { "epoch": 1.8135172413793104, "grad_norm": 4.164895057678223, "learning_rate": 7.55692930319618e-06, "loss": 0.8038, "step": 6574 }, { "epoch": 1.8137931034482757, "grad_norm": 3.960784912109375, "learning_rate": 7.556145275434831e-06, "loss": 0.8237, "step": 6575 }, { "epoch": 1.8140689655172415, "grad_norm": 3.626159906387329, "learning_rate": 7.555361162575379e-06, "loss": 0.8548, "step": 6576 }, { "epoch": 1.8143448275862069, "grad_norm": 3.9938905239105225, "learning_rate": 7.554576964643933e-06, "loss": 0.7565, "step": 6577 }, { "epoch": 1.8146206896551724, "grad_norm": 3.8717541694641113, "learning_rate": 7.553792681666594e-06, "loss": 0.6875, "step": 6578 }, { "epoch": 1.814896551724138, "grad_norm": 3.9690675735473633, "learning_rate": 7.553008313669479e-06, "loss": 0.8179, "step": 6579 }, { "epoch": 1.8151724137931033, "grad_norm": 3.606736660003662, "learning_rate": 7.552223860678694e-06, "loss": 0.6503, "step": 6580 }, { "epoch": 1.815448275862069, "grad_norm": 3.589916467666626, "learning_rate": 7.5514393227203595e-06, "loss": 0.8019, "step": 6581 }, { "epoch": 1.8157241379310345, "grad_norm": 4.202364444732666, "learning_rate": 7.5506546998205925e-06, "loss": 0.89, "step": 6582 }, { "epoch": 1.8159999999999998, "grad_norm": 4.265813827514648, "learning_rate": 7.5498699920055115e-06, "loss": 0.8329, "step": 6583 }, { "epoch": 1.8162758620689656, "grad_norm": 3.681450128555298, "learning_rate": 7.549085199301245e-06, "loss": 0.8011, "step": 6584 }, { "epoch": 1.816551724137931, "grad_norm": 4.183761119842529, "learning_rate": 7.548300321733918e-06, "loss": 0.7934, "step": 6585 }, { "epoch": 1.8168275862068965, "grad_norm": 4.297820568084717, "learning_rate": 7.547515359329662e-06, "loss": 0.7682, "step": 6586 }, { "epoch": 1.8171034482758621, "grad_norm": 3.4473512172698975, "learning_rate": 7.546730312114608e-06, "loss": 0.7794, "step": 6587 }, { "epoch": 1.8173793103448275, "grad_norm": 3.42354416847229, "learning_rate": 7.545945180114891e-06, "loss": 0.7834, "step": 6588 }, { "epoch": 1.8176551724137933, "grad_norm": 3.361168622970581, "learning_rate": 7.54515996335665e-06, "loss": 0.7203, "step": 6589 }, { "epoch": 1.8179310344827586, "grad_norm": 4.545584678649902, "learning_rate": 7.5443746618660256e-06, "loss": 1.1449, "step": 6590 }, { "epoch": 1.8182068965517242, "grad_norm": 3.8910164833068848, "learning_rate": 7.543589275669163e-06, "loss": 0.7715, "step": 6591 }, { "epoch": 1.8184827586206898, "grad_norm": 4.503053665161133, "learning_rate": 7.542803804792207e-06, "loss": 0.9173, "step": 6592 }, { "epoch": 1.818758620689655, "grad_norm": 3.8496460914611816, "learning_rate": 7.542018249261309e-06, "loss": 0.8437, "step": 6593 }, { "epoch": 1.8190344827586207, "grad_norm": 4.1842780113220215, "learning_rate": 7.541232609102621e-06, "loss": 0.823, "step": 6594 }, { "epoch": 1.8193103448275862, "grad_norm": 3.9437694549560547, "learning_rate": 7.540446884342297e-06, "loss": 0.8997, "step": 6595 }, { "epoch": 1.8195862068965516, "grad_norm": 3.8971641063690186, "learning_rate": 7.539661075006497e-06, "loss": 0.7026, "step": 6596 }, { "epoch": 1.8198620689655174, "grad_norm": 3.937234878540039, "learning_rate": 7.538875181121381e-06, "loss": 0.8645, "step": 6597 }, { "epoch": 1.8201379310344827, "grad_norm": 4.238021373748779, "learning_rate": 7.538089202713112e-06, "loss": 0.9911, "step": 6598 }, { "epoch": 1.8204137931034483, "grad_norm": 4.016236782073975, "learning_rate": 7.537303139807856e-06, "loss": 0.8499, "step": 6599 }, { "epoch": 1.8206896551724139, "grad_norm": 4.092555046081543, "learning_rate": 7.536516992431785e-06, "loss": 0.7293, "step": 6600 }, { "epoch": 1.8209655172413792, "grad_norm": 4.010569095611572, "learning_rate": 7.535730760611068e-06, "loss": 0.8649, "step": 6601 }, { "epoch": 1.8212413793103448, "grad_norm": 3.882451295852661, "learning_rate": 7.53494444437188e-06, "loss": 0.7586, "step": 6602 }, { "epoch": 1.8215172413793104, "grad_norm": 3.912677764892578, "learning_rate": 7.5341580437404e-06, "loss": 0.7062, "step": 6603 }, { "epoch": 1.8217931034482757, "grad_norm": 3.6890218257904053, "learning_rate": 7.53337155874281e-06, "loss": 0.7022, "step": 6604 }, { "epoch": 1.8220689655172415, "grad_norm": 3.8924169540405273, "learning_rate": 7.53258498940529e-06, "loss": 0.8538, "step": 6605 }, { "epoch": 1.8223448275862069, "grad_norm": 4.417942523956299, "learning_rate": 7.5317983357540305e-06, "loss": 0.937, "step": 6606 }, { "epoch": 1.8226206896551724, "grad_norm": 4.241727828979492, "learning_rate": 7.531011597815214e-06, "loss": 0.7978, "step": 6607 }, { "epoch": 1.822896551724138, "grad_norm": 4.15301513671875, "learning_rate": 7.530224775615038e-06, "loss": 0.7662, "step": 6608 }, { "epoch": 1.8231724137931034, "grad_norm": 4.147266864776611, "learning_rate": 7.529437869179696e-06, "loss": 0.7981, "step": 6609 }, { "epoch": 1.823448275862069, "grad_norm": 4.029078960418701, "learning_rate": 7.528650878535382e-06, "loss": 0.7952, "step": 6610 }, { "epoch": 1.8237241379310345, "grad_norm": 4.049752235412598, "learning_rate": 7.5278638037083e-06, "loss": 0.7909, "step": 6611 }, { "epoch": 1.8239999999999998, "grad_norm": 3.7480740547180176, "learning_rate": 7.527076644724649e-06, "loss": 0.6447, "step": 6612 }, { "epoch": 1.8242758620689656, "grad_norm": 4.101796627044678, "learning_rate": 7.52628940161064e-06, "loss": 0.7479, "step": 6613 }, { "epoch": 1.824551724137931, "grad_norm": 3.8044841289520264, "learning_rate": 7.5255020743924785e-06, "loss": 0.877, "step": 6614 }, { "epoch": 1.8248275862068966, "grad_norm": 3.7872133255004883, "learning_rate": 7.524714663096376e-06, "loss": 0.8191, "step": 6615 }, { "epoch": 1.8251034482758621, "grad_norm": 4.222070693969727, "learning_rate": 7.523927167748546e-06, "loss": 0.8237, "step": 6616 }, { "epoch": 1.8253793103448275, "grad_norm": 3.8708536624908447, "learning_rate": 7.523139588375208e-06, "loss": 0.9277, "step": 6617 }, { "epoch": 1.825655172413793, "grad_norm": 3.8068320751190186, "learning_rate": 7.522351925002578e-06, "loss": 0.8993, "step": 6618 }, { "epoch": 1.8259310344827586, "grad_norm": 3.7127742767333984, "learning_rate": 7.521564177656883e-06, "loss": 0.7262, "step": 6619 }, { "epoch": 1.8262068965517242, "grad_norm": 3.742666006088257, "learning_rate": 7.520776346364343e-06, "loss": 0.7921, "step": 6620 }, { "epoch": 1.8264827586206898, "grad_norm": 3.6900253295898438, "learning_rate": 7.51998843115119e-06, "loss": 0.7353, "step": 6621 }, { "epoch": 1.826758620689655, "grad_norm": 3.8161139488220215, "learning_rate": 7.519200432043655e-06, "loss": 0.6973, "step": 6622 }, { "epoch": 1.8270344827586207, "grad_norm": 3.9968113899230957, "learning_rate": 7.5184123490679715e-06, "loss": 0.8794, "step": 6623 }, { "epoch": 1.8273103448275863, "grad_norm": 4.054862976074219, "learning_rate": 7.517624182250374e-06, "loss": 0.7298, "step": 6624 }, { "epoch": 1.8275862068965516, "grad_norm": 4.130169868469238, "learning_rate": 7.516835931617104e-06, "loss": 0.8938, "step": 6625 }, { "epoch": 1.8278620689655174, "grad_norm": 4.8246870040893555, "learning_rate": 7.5160475971944e-06, "loss": 0.7579, "step": 6626 }, { "epoch": 1.8281379310344827, "grad_norm": 4.0135369300842285, "learning_rate": 7.515259179008513e-06, "loss": 0.9173, "step": 6627 }, { "epoch": 1.8284137931034483, "grad_norm": 4.201539516448975, "learning_rate": 7.514470677085686e-06, "loss": 0.8761, "step": 6628 }, { "epoch": 1.8286896551724139, "grad_norm": 3.83268404006958, "learning_rate": 7.513682091452169e-06, "loss": 0.7789, "step": 6629 }, { "epoch": 1.8289655172413792, "grad_norm": 4.012418270111084, "learning_rate": 7.512893422134219e-06, "loss": 0.7658, "step": 6630 }, { "epoch": 1.8292413793103448, "grad_norm": 4.090364933013916, "learning_rate": 7.512104669158089e-06, "loss": 0.9053, "step": 6631 }, { "epoch": 1.8295172413793104, "grad_norm": 4.315858840942383, "learning_rate": 7.5113158325500406e-06, "loss": 0.957, "step": 6632 }, { "epoch": 1.8297931034482757, "grad_norm": 4.216587066650391, "learning_rate": 7.510526912336332e-06, "loss": 0.8917, "step": 6633 }, { "epoch": 1.8300689655172415, "grad_norm": 4.293330192565918, "learning_rate": 7.5097379085432295e-06, "loss": 0.9535, "step": 6634 }, { "epoch": 1.8303448275862069, "grad_norm": 4.104452133178711, "learning_rate": 7.508948821197002e-06, "loss": 0.6835, "step": 6635 }, { "epoch": 1.8306206896551724, "grad_norm": 3.6098756790161133, "learning_rate": 7.508159650323915e-06, "loss": 0.7219, "step": 6636 }, { "epoch": 1.830896551724138, "grad_norm": 3.6063175201416016, "learning_rate": 7.507370395950245e-06, "loss": 0.714, "step": 6637 }, { "epoch": 1.8311724137931034, "grad_norm": 4.001556396484375, "learning_rate": 7.5065810581022665e-06, "loss": 0.7885, "step": 6638 }, { "epoch": 1.831448275862069, "grad_norm": 3.718574047088623, "learning_rate": 7.505791636806257e-06, "loss": 0.7877, "step": 6639 }, { "epoch": 1.8317241379310345, "grad_norm": 4.256497383117676, "learning_rate": 7.505002132088497e-06, "loss": 0.803, "step": 6640 }, { "epoch": 1.8319999999999999, "grad_norm": 3.9030117988586426, "learning_rate": 7.504212543975273e-06, "loss": 0.7859, "step": 6641 }, { "epoch": 1.8322758620689656, "grad_norm": 3.788424491882324, "learning_rate": 7.503422872492871e-06, "loss": 0.8678, "step": 6642 }, { "epoch": 1.832551724137931, "grad_norm": 4.193147659301758, "learning_rate": 7.502633117667577e-06, "loss": 0.8547, "step": 6643 }, { "epoch": 1.8328275862068966, "grad_norm": 3.9550483226776123, "learning_rate": 7.501843279525689e-06, "loss": 0.7388, "step": 6644 }, { "epoch": 1.8331034482758621, "grad_norm": 4.017431735992432, "learning_rate": 7.501053358093495e-06, "loss": 0.8583, "step": 6645 }, { "epoch": 1.8333793103448275, "grad_norm": 3.8868165016174316, "learning_rate": 7.500263353397299e-06, "loss": 0.704, "step": 6646 }, { "epoch": 1.833655172413793, "grad_norm": 4.145535945892334, "learning_rate": 7.4994732654634e-06, "loss": 0.8807, "step": 6647 }, { "epoch": 1.8339310344827586, "grad_norm": 4.077571868896484, "learning_rate": 7.498683094318096e-06, "loss": 0.7029, "step": 6648 }, { "epoch": 1.834206896551724, "grad_norm": 3.503324508666992, "learning_rate": 7.4978928399877e-06, "loss": 0.7004, "step": 6649 }, { "epoch": 1.8344827586206898, "grad_norm": 4.136828899383545, "learning_rate": 7.497102502498519e-06, "loss": 0.8878, "step": 6650 }, { "epoch": 1.8347586206896551, "grad_norm": 4.244620323181152, "learning_rate": 7.496312081876862e-06, "loss": 0.865, "step": 6651 }, { "epoch": 1.8350344827586207, "grad_norm": 4.214423179626465, "learning_rate": 7.495521578149045e-06, "loss": 0.8682, "step": 6652 }, { "epoch": 1.8353103448275863, "grad_norm": 4.0493621826171875, "learning_rate": 7.4947309913413855e-06, "loss": 0.7264, "step": 6653 }, { "epoch": 1.8355862068965516, "grad_norm": 3.9184179306030273, "learning_rate": 7.4939403214802046e-06, "loss": 0.87, "step": 6654 }, { "epoch": 1.8358620689655174, "grad_norm": 3.839104413986206, "learning_rate": 7.493149568591821e-06, "loss": 0.803, "step": 6655 }, { "epoch": 1.8361379310344828, "grad_norm": 3.986577033996582, "learning_rate": 7.492358732702564e-06, "loss": 0.9023, "step": 6656 }, { "epoch": 1.8364137931034483, "grad_norm": 3.8531532287597656, "learning_rate": 7.491567813838758e-06, "loss": 0.8242, "step": 6657 }, { "epoch": 1.836689655172414, "grad_norm": 3.6141843795776367, "learning_rate": 7.490776812026738e-06, "loss": 0.7714, "step": 6658 }, { "epoch": 1.8369655172413792, "grad_norm": 3.6536295413970947, "learning_rate": 7.489985727292834e-06, "loss": 0.8343, "step": 6659 }, { "epoch": 1.8372413793103448, "grad_norm": 3.745598793029785, "learning_rate": 7.4891945596633865e-06, "loss": 0.8762, "step": 6660 }, { "epoch": 1.8375172413793104, "grad_norm": 4.049334526062012, "learning_rate": 7.488403309164731e-06, "loss": 0.7806, "step": 6661 }, { "epoch": 1.8377931034482757, "grad_norm": 3.739473342895508, "learning_rate": 7.487611975823213e-06, "loss": 0.7627, "step": 6662 }, { "epoch": 1.8380689655172415, "grad_norm": 3.8291356563568115, "learning_rate": 7.486820559665173e-06, "loss": 0.8599, "step": 6663 }, { "epoch": 1.8383448275862069, "grad_norm": 3.790297269821167, "learning_rate": 7.486029060716962e-06, "loss": 0.7562, "step": 6664 }, { "epoch": 1.8386206896551724, "grad_norm": 3.320570230484009, "learning_rate": 7.485237479004927e-06, "loss": 0.7766, "step": 6665 }, { "epoch": 1.838896551724138, "grad_norm": 4.186694622039795, "learning_rate": 7.484445814555425e-06, "loss": 0.8077, "step": 6666 }, { "epoch": 1.8391724137931034, "grad_norm": 3.5836215019226074, "learning_rate": 7.483654067394809e-06, "loss": 0.7621, "step": 6667 }, { "epoch": 1.839448275862069, "grad_norm": 3.747816801071167, "learning_rate": 7.482862237549437e-06, "loss": 0.851, "step": 6668 }, { "epoch": 1.8397241379310345, "grad_norm": 4.090845108032227, "learning_rate": 7.482070325045674e-06, "loss": 0.8185, "step": 6669 }, { "epoch": 1.8399999999999999, "grad_norm": 3.6901443004608154, "learning_rate": 7.48127832990988e-06, "loss": 0.7905, "step": 6670 }, { "epoch": 1.8402758620689657, "grad_norm": 3.7737107276916504, "learning_rate": 7.480486252168424e-06, "loss": 0.7514, "step": 6671 }, { "epoch": 1.840551724137931, "grad_norm": 4.30275821685791, "learning_rate": 7.479694091847674e-06, "loss": 0.8825, "step": 6672 }, { "epoch": 1.8408275862068966, "grad_norm": 4.0633625984191895, "learning_rate": 7.478901848974004e-06, "loss": 0.9074, "step": 6673 }, { "epoch": 1.8411034482758621, "grad_norm": 3.6704964637756348, "learning_rate": 7.478109523573786e-06, "loss": 0.7192, "step": 6674 }, { "epoch": 1.8413793103448275, "grad_norm": 4.037412643432617, "learning_rate": 7.477317115673403e-06, "loss": 0.7108, "step": 6675 }, { "epoch": 1.841655172413793, "grad_norm": 4.060011863708496, "learning_rate": 7.47652462529923e-06, "loss": 0.74, "step": 6676 }, { "epoch": 1.8419310344827586, "grad_norm": 3.550677537918091, "learning_rate": 7.475732052477652e-06, "loss": 0.7271, "step": 6677 }, { "epoch": 1.842206896551724, "grad_norm": 4.080298900604248, "learning_rate": 7.474939397235057e-06, "loss": 0.9135, "step": 6678 }, { "epoch": 1.8424827586206898, "grad_norm": 4.008673667907715, "learning_rate": 7.474146659597832e-06, "loss": 0.7963, "step": 6679 }, { "epoch": 1.8427586206896551, "grad_norm": 4.329872131347656, "learning_rate": 7.473353839592368e-06, "loss": 0.7954, "step": 6680 }, { "epoch": 1.8430344827586207, "grad_norm": 4.100995063781738, "learning_rate": 7.472560937245061e-06, "loss": 0.8073, "step": 6681 }, { "epoch": 1.8433103448275863, "grad_norm": 4.3490891456604, "learning_rate": 7.471767952582307e-06, "loss": 0.7835, "step": 6682 }, { "epoch": 1.8435862068965516, "grad_norm": 4.121224880218506, "learning_rate": 7.4709748856305055e-06, "loss": 0.8913, "step": 6683 }, { "epoch": 1.8438620689655172, "grad_norm": 4.238190174102783, "learning_rate": 7.470181736416059e-06, "loss": 0.9717, "step": 6684 }, { "epoch": 1.8441379310344828, "grad_norm": 4.017167568206787, "learning_rate": 7.469388504965373e-06, "loss": 0.8566, "step": 6685 }, { "epoch": 1.8444137931034483, "grad_norm": 3.686068058013916, "learning_rate": 7.468595191304854e-06, "loss": 0.8203, "step": 6686 }, { "epoch": 1.844689655172414, "grad_norm": 3.631291389465332, "learning_rate": 7.467801795460916e-06, "loss": 0.8478, "step": 6687 }, { "epoch": 1.8449655172413792, "grad_norm": 5.0867695808410645, "learning_rate": 7.467008317459968e-06, "loss": 0.8527, "step": 6688 }, { "epoch": 1.8452413793103448, "grad_norm": 3.8479881286621094, "learning_rate": 7.466214757328432e-06, "loss": 0.7964, "step": 6689 }, { "epoch": 1.8455172413793104, "grad_norm": 3.53985857963562, "learning_rate": 7.465421115092719e-06, "loss": 0.6945, "step": 6690 }, { "epoch": 1.8457931034482757, "grad_norm": 3.8370659351348877, "learning_rate": 7.464627390779258e-06, "loss": 0.7059, "step": 6691 }, { "epoch": 1.8460689655172415, "grad_norm": 3.894718647003174, "learning_rate": 7.463833584414468e-06, "loss": 0.7266, "step": 6692 }, { "epoch": 1.8463448275862069, "grad_norm": 3.992672920227051, "learning_rate": 7.4630396960247795e-06, "loss": 0.7412, "step": 6693 }, { "epoch": 1.8466206896551725, "grad_norm": 4.105615139007568, "learning_rate": 7.462245725636622e-06, "loss": 0.8655, "step": 6694 }, { "epoch": 1.846896551724138, "grad_norm": 4.13037109375, "learning_rate": 7.461451673276427e-06, "loss": 0.8029, "step": 6695 }, { "epoch": 1.8471724137931034, "grad_norm": 3.4439539909362793, "learning_rate": 7.460657538970628e-06, "loss": 0.7941, "step": 6696 }, { "epoch": 1.847448275862069, "grad_norm": 4.32697057723999, "learning_rate": 7.459863322745665e-06, "loss": 0.8426, "step": 6697 }, { "epoch": 1.8477241379310345, "grad_norm": 3.611614465713501, "learning_rate": 7.459069024627981e-06, "loss": 0.7474, "step": 6698 }, { "epoch": 1.8479999999999999, "grad_norm": 3.928760051727295, "learning_rate": 7.4582746446440146e-06, "loss": 0.8556, "step": 6699 }, { "epoch": 1.8482758620689657, "grad_norm": 3.685213088989258, "learning_rate": 7.4574801828202155e-06, "loss": 0.6416, "step": 6700 }, { "epoch": 1.848551724137931, "grad_norm": 3.726702928543091, "learning_rate": 7.456685639183031e-06, "loss": 0.753, "step": 6701 }, { "epoch": 1.8488275862068966, "grad_norm": 3.799514055252075, "learning_rate": 7.455891013758914e-06, "loss": 0.7406, "step": 6702 }, { "epoch": 1.8491034482758621, "grad_norm": 4.1471710205078125, "learning_rate": 7.455096306574316e-06, "loss": 0.8619, "step": 6703 }, { "epoch": 1.8493793103448275, "grad_norm": 3.8465278148651123, "learning_rate": 7.454301517655698e-06, "loss": 0.7507, "step": 6704 }, { "epoch": 1.849655172413793, "grad_norm": 3.8401854038238525, "learning_rate": 7.453506647029517e-06, "loss": 0.8241, "step": 6705 }, { "epoch": 1.8499310344827586, "grad_norm": 3.959616184234619, "learning_rate": 7.452711694722236e-06, "loss": 0.7902, "step": 6706 }, { "epoch": 1.850206896551724, "grad_norm": 3.9730260372161865, "learning_rate": 7.451916660760321e-06, "loss": 0.7609, "step": 6707 }, { "epoch": 1.8504827586206898, "grad_norm": 3.7898623943328857, "learning_rate": 7.451121545170241e-06, "loss": 0.938, "step": 6708 }, { "epoch": 1.8507586206896551, "grad_norm": 3.655911684036255, "learning_rate": 7.450326347978462e-06, "loss": 0.7693, "step": 6709 }, { "epoch": 1.8510344827586207, "grad_norm": 4.297811508178711, "learning_rate": 7.449531069211464e-06, "loss": 0.8014, "step": 6710 }, { "epoch": 1.8513103448275863, "grad_norm": 4.115379810333252, "learning_rate": 7.448735708895717e-06, "loss": 0.8502, "step": 6711 }, { "epoch": 1.8515862068965516, "grad_norm": 3.4058945178985596, "learning_rate": 7.447940267057703e-06, "loss": 0.82, "step": 6712 }, { "epoch": 1.8518620689655172, "grad_norm": 4.0341901779174805, "learning_rate": 7.447144743723903e-06, "loss": 0.8529, "step": 6713 }, { "epoch": 1.8521379310344828, "grad_norm": 3.548854351043701, "learning_rate": 7.4463491389208005e-06, "loss": 0.6933, "step": 6714 }, { "epoch": 1.8524137931034481, "grad_norm": 4.230555534362793, "learning_rate": 7.445553452674883e-06, "loss": 0.8013, "step": 6715 }, { "epoch": 1.852689655172414, "grad_norm": 3.7768757343292236, "learning_rate": 7.444757685012641e-06, "loss": 0.779, "step": 6716 }, { "epoch": 1.8529655172413793, "grad_norm": 4.142418384552002, "learning_rate": 7.443961835960565e-06, "loss": 0.8743, "step": 6717 }, { "epoch": 1.8532413793103448, "grad_norm": 4.182827949523926, "learning_rate": 7.443165905545153e-06, "loss": 0.7671, "step": 6718 }, { "epoch": 1.8535172413793104, "grad_norm": 4.435004234313965, "learning_rate": 7.442369893792899e-06, "loss": 0.9055, "step": 6719 }, { "epoch": 1.8537931034482757, "grad_norm": 4.327605247497559, "learning_rate": 7.441573800730307e-06, "loss": 0.8715, "step": 6720 }, { "epoch": 1.8540689655172415, "grad_norm": 3.951897621154785, "learning_rate": 7.440777626383877e-06, "loss": 0.7932, "step": 6721 }, { "epoch": 1.854344827586207, "grad_norm": 3.6438560485839844, "learning_rate": 7.439981370780116e-06, "loss": 0.7518, "step": 6722 }, { "epoch": 1.8546206896551725, "grad_norm": 3.4867119789123535, "learning_rate": 7.439185033945534e-06, "loss": 0.7395, "step": 6723 }, { "epoch": 1.854896551724138, "grad_norm": 4.065187931060791, "learning_rate": 7.438388615906641e-06, "loss": 0.8183, "step": 6724 }, { "epoch": 1.8551724137931034, "grad_norm": 5.166202545166016, "learning_rate": 7.437592116689952e-06, "loss": 0.8565, "step": 6725 }, { "epoch": 1.855448275862069, "grad_norm": 3.469508171081543, "learning_rate": 7.436795536321982e-06, "loss": 0.711, "step": 6726 }, { "epoch": 1.8557241379310345, "grad_norm": 3.74527645111084, "learning_rate": 7.435998874829253e-06, "loss": 0.7407, "step": 6727 }, { "epoch": 1.8559999999999999, "grad_norm": 4.2877607345581055, "learning_rate": 7.435202132238285e-06, "loss": 0.7514, "step": 6728 }, { "epoch": 1.8562758620689657, "grad_norm": 4.306102752685547, "learning_rate": 7.434405308575604e-06, "loss": 0.8802, "step": 6729 }, { "epoch": 1.856551724137931, "grad_norm": 3.8856425285339355, "learning_rate": 7.433608403867736e-06, "loss": 0.6894, "step": 6730 }, { "epoch": 1.8568275862068966, "grad_norm": 4.238690376281738, "learning_rate": 7.432811418141212e-06, "loss": 0.8023, "step": 6731 }, { "epoch": 1.8571034482758622, "grad_norm": 4.024131774902344, "learning_rate": 7.432014351422566e-06, "loss": 0.7567, "step": 6732 }, { "epoch": 1.8573793103448275, "grad_norm": 4.0614142417907715, "learning_rate": 7.431217203738332e-06, "loss": 0.9913, "step": 6733 }, { "epoch": 1.857655172413793, "grad_norm": 3.886984348297119, "learning_rate": 7.4304199751150484e-06, "loss": 0.7676, "step": 6734 }, { "epoch": 1.8579310344827586, "grad_norm": 4.130056858062744, "learning_rate": 7.429622665579259e-06, "loss": 0.7497, "step": 6735 }, { "epoch": 1.858206896551724, "grad_norm": 3.755514621734619, "learning_rate": 7.428825275157503e-06, "loss": 0.761, "step": 6736 }, { "epoch": 1.8584827586206898, "grad_norm": 3.8968398571014404, "learning_rate": 7.428027803876331e-06, "loss": 0.8096, "step": 6737 }, { "epoch": 1.8587586206896551, "grad_norm": 3.642652750015259, "learning_rate": 7.4272302517622885e-06, "loss": 0.759, "step": 6738 }, { "epoch": 1.8590344827586207, "grad_norm": 3.738118886947632, "learning_rate": 7.42643261884193e-06, "loss": 0.6984, "step": 6739 }, { "epoch": 1.8593103448275863, "grad_norm": 3.4886295795440674, "learning_rate": 7.425634905141807e-06, "loss": 0.7649, "step": 6740 }, { "epoch": 1.8595862068965516, "grad_norm": 3.7058422565460205, "learning_rate": 7.42483711068848e-06, "loss": 0.6882, "step": 6741 }, { "epoch": 1.8598620689655172, "grad_norm": 4.241992473602295, "learning_rate": 7.424039235508507e-06, "loss": 0.8533, "step": 6742 }, { "epoch": 1.8601379310344828, "grad_norm": 4.0015692710876465, "learning_rate": 7.42324127962845e-06, "loss": 0.8781, "step": 6743 }, { "epoch": 1.8604137931034481, "grad_norm": 4.021305561065674, "learning_rate": 7.422443243074876e-06, "loss": 0.7389, "step": 6744 }, { "epoch": 1.860689655172414, "grad_norm": 4.158514976501465, "learning_rate": 7.421645125874353e-06, "loss": 0.7954, "step": 6745 }, { "epoch": 1.8609655172413793, "grad_norm": 3.930655002593994, "learning_rate": 7.420846928053449e-06, "loss": 0.7923, "step": 6746 }, { "epoch": 1.8612413793103448, "grad_norm": 3.816549301147461, "learning_rate": 7.420048649638739e-06, "loss": 0.8098, "step": 6747 }, { "epoch": 1.8615172413793104, "grad_norm": 4.013473987579346, "learning_rate": 7.419250290656799e-06, "loss": 0.7253, "step": 6748 }, { "epoch": 1.8617931034482758, "grad_norm": 3.629121780395508, "learning_rate": 7.4184518511342074e-06, "loss": 0.7478, "step": 6749 }, { "epoch": 1.8620689655172413, "grad_norm": 4.386204242706299, "learning_rate": 7.4176533310975455e-06, "loss": 0.8911, "step": 6750 }, { "epoch": 1.862344827586207, "grad_norm": 3.9372804164886475, "learning_rate": 7.416854730573396e-06, "loss": 0.7212, "step": 6751 }, { "epoch": 1.8626206896551725, "grad_norm": 3.8175101280212402, "learning_rate": 7.416056049588348e-06, "loss": 0.7894, "step": 6752 }, { "epoch": 1.862896551724138, "grad_norm": 3.800055503845215, "learning_rate": 7.415257288168989e-06, "loss": 0.7678, "step": 6753 }, { "epoch": 1.8631724137931034, "grad_norm": 4.8801984786987305, "learning_rate": 7.414458446341913e-06, "loss": 1.0155, "step": 6754 }, { "epoch": 1.863448275862069, "grad_norm": 3.884845733642578, "learning_rate": 7.413659524133712e-06, "loss": 0.7953, "step": 6755 }, { "epoch": 1.8637241379310345, "grad_norm": 3.99153470993042, "learning_rate": 7.412860521570986e-06, "loss": 0.6712, "step": 6756 }, { "epoch": 1.8639999999999999, "grad_norm": 3.7417476177215576, "learning_rate": 7.4120614386803324e-06, "loss": 0.8141, "step": 6757 }, { "epoch": 1.8642758620689657, "grad_norm": 4.119503021240234, "learning_rate": 7.411262275488357e-06, "loss": 0.8116, "step": 6758 }, { "epoch": 1.864551724137931, "grad_norm": 4.083722114562988, "learning_rate": 7.410463032021662e-06, "loss": 0.8421, "step": 6759 }, { "epoch": 1.8648275862068966, "grad_norm": 3.487741708755493, "learning_rate": 7.409663708306858e-06, "loss": 0.7311, "step": 6760 }, { "epoch": 1.8651034482758622, "grad_norm": 3.9702932834625244, "learning_rate": 7.408864304370554e-06, "loss": 0.7486, "step": 6761 }, { "epoch": 1.8653793103448275, "grad_norm": 3.7359039783477783, "learning_rate": 7.408064820239365e-06, "loss": 0.8678, "step": 6762 }, { "epoch": 1.865655172413793, "grad_norm": 3.8178117275238037, "learning_rate": 7.407265255939906e-06, "loss": 0.8957, "step": 6763 }, { "epoch": 1.8659310344827587, "grad_norm": 4.290638446807861, "learning_rate": 7.4064656114987966e-06, "loss": 0.7499, "step": 6764 }, { "epoch": 1.866206896551724, "grad_norm": 3.9652419090270996, "learning_rate": 7.405665886942656e-06, "loss": 0.8153, "step": 6765 }, { "epoch": 1.8664827586206898, "grad_norm": 3.817636728286743, "learning_rate": 7.404866082298112e-06, "loss": 0.9187, "step": 6766 }, { "epoch": 1.8667586206896551, "grad_norm": 3.6118173599243164, "learning_rate": 7.404066197591788e-06, "loss": 0.7794, "step": 6767 }, { "epoch": 1.8670344827586207, "grad_norm": 3.9794087409973145, "learning_rate": 7.4032662328503154e-06, "loss": 0.8648, "step": 6768 }, { "epoch": 1.8673103448275863, "grad_norm": 3.777522563934326, "learning_rate": 7.4024661881003256e-06, "loss": 0.8191, "step": 6769 }, { "epoch": 1.8675862068965516, "grad_norm": 4.297286033630371, "learning_rate": 7.401666063368453e-06, "loss": 1.0087, "step": 6770 }, { "epoch": 1.8678620689655172, "grad_norm": 4.385587692260742, "learning_rate": 7.400865858681334e-06, "loss": 0.9522, "step": 6771 }, { "epoch": 1.8681379310344828, "grad_norm": 3.5791521072387695, "learning_rate": 7.400065574065612e-06, "loss": 0.6687, "step": 6772 }, { "epoch": 1.8684137931034481, "grad_norm": 3.767981767654419, "learning_rate": 7.399265209547927e-06, "loss": 0.9181, "step": 6773 }, { "epoch": 1.868689655172414, "grad_norm": 3.66853928565979, "learning_rate": 7.398464765154926e-06, "loss": 0.7856, "step": 6774 }, { "epoch": 1.8689655172413793, "grad_norm": 4.011200428009033, "learning_rate": 7.397664240913255e-06, "loss": 0.7565, "step": 6775 }, { "epoch": 1.8692413793103448, "grad_norm": 3.6440744400024414, "learning_rate": 7.3968636368495665e-06, "loss": 0.7952, "step": 6776 }, { "epoch": 1.8695172413793104, "grad_norm": 4.004151344299316, "learning_rate": 7.396062952990514e-06, "loss": 0.8314, "step": 6777 }, { "epoch": 1.8697931034482758, "grad_norm": 3.7912468910217285, "learning_rate": 7.395262189362752e-06, "loss": 0.9332, "step": 6778 }, { "epoch": 1.8700689655172413, "grad_norm": 3.894937753677368, "learning_rate": 7.394461345992939e-06, "loss": 0.825, "step": 6779 }, { "epoch": 1.870344827586207, "grad_norm": 3.9124069213867188, "learning_rate": 7.393660422907738e-06, "loss": 0.7913, "step": 6780 }, { "epoch": 1.8706206896551723, "grad_norm": 3.6079745292663574, "learning_rate": 7.392859420133814e-06, "loss": 0.7926, "step": 6781 }, { "epoch": 1.870896551724138, "grad_norm": 4.006809711456299, "learning_rate": 7.3920583376978305e-06, "loss": 0.8106, "step": 6782 }, { "epoch": 1.8711724137931034, "grad_norm": 3.693315267562866, "learning_rate": 7.39125717562646e-06, "loss": 0.7675, "step": 6783 }, { "epoch": 1.871448275862069, "grad_norm": 3.8707432746887207, "learning_rate": 7.39045593394637e-06, "loss": 0.775, "step": 6784 }, { "epoch": 1.8717241379310345, "grad_norm": 3.9036037921905518, "learning_rate": 7.389654612684241e-06, "loss": 0.7924, "step": 6785 }, { "epoch": 1.8719999999999999, "grad_norm": 4.055956840515137, "learning_rate": 7.3888532118667446e-06, "loss": 0.8242, "step": 6786 }, { "epoch": 1.8722758620689657, "grad_norm": 3.9083430767059326, "learning_rate": 7.3880517315205635e-06, "loss": 0.7685, "step": 6787 }, { "epoch": 1.872551724137931, "grad_norm": 4.023104190826416, "learning_rate": 7.38725017167238e-06, "loss": 0.72, "step": 6788 }, { "epoch": 1.8728275862068966, "grad_norm": 3.932478666305542, "learning_rate": 7.38644853234888e-06, "loss": 0.7979, "step": 6789 }, { "epoch": 1.8731034482758622, "grad_norm": 3.3189804553985596, "learning_rate": 7.385646813576749e-06, "loss": 0.7124, "step": 6790 }, { "epoch": 1.8733793103448275, "grad_norm": 4.450799465179443, "learning_rate": 7.384845015382681e-06, "loss": 0.9805, "step": 6791 }, { "epoch": 1.873655172413793, "grad_norm": 4.058814525604248, "learning_rate": 7.384043137793365e-06, "loss": 0.8823, "step": 6792 }, { "epoch": 1.8739310344827587, "grad_norm": 3.799452066421509, "learning_rate": 7.383241180835501e-06, "loss": 0.8364, "step": 6793 }, { "epoch": 1.874206896551724, "grad_norm": 3.8666276931762695, "learning_rate": 7.382439144535784e-06, "loss": 0.6971, "step": 6794 }, { "epoch": 1.8744827586206898, "grad_norm": 4.0142903327941895, "learning_rate": 7.381637028920919e-06, "loss": 0.8321, "step": 6795 }, { "epoch": 1.8747586206896552, "grad_norm": 3.633409261703491, "learning_rate": 7.380834834017603e-06, "loss": 0.7429, "step": 6796 }, { "epoch": 1.8750344827586207, "grad_norm": 4.240599155426025, "learning_rate": 7.380032559852548e-06, "loss": 0.7744, "step": 6797 }, { "epoch": 1.8753103448275863, "grad_norm": 3.8463375568389893, "learning_rate": 7.379230206452461e-06, "loss": 0.8623, "step": 6798 }, { "epoch": 1.8755862068965516, "grad_norm": 4.208836078643799, "learning_rate": 7.378427773844056e-06, "loss": 0.7883, "step": 6799 }, { "epoch": 1.8758620689655172, "grad_norm": 4.112133979797363, "learning_rate": 7.377625262054043e-06, "loss": 0.8288, "step": 6800 }, { "epoch": 1.8761379310344828, "grad_norm": 3.9507391452789307, "learning_rate": 7.376822671109143e-06, "loss": 0.86, "step": 6801 }, { "epoch": 1.8764137931034481, "grad_norm": 3.9067234992980957, "learning_rate": 7.376020001036072e-06, "loss": 0.7759, "step": 6802 }, { "epoch": 1.876689655172414, "grad_norm": 4.246078014373779, "learning_rate": 7.375217251861555e-06, "loss": 0.903, "step": 6803 }, { "epoch": 1.8769655172413793, "grad_norm": 4.422150611877441, "learning_rate": 7.374414423612314e-06, "loss": 0.8271, "step": 6804 }, { "epoch": 1.8772413793103448, "grad_norm": 3.936400890350342, "learning_rate": 7.3736115163150775e-06, "loss": 0.8095, "step": 6805 }, { "epoch": 1.8775172413793104, "grad_norm": 4.094451427459717, "learning_rate": 7.372808529996579e-06, "loss": 0.824, "step": 6806 }, { "epoch": 1.8777931034482758, "grad_norm": 3.7646098136901855, "learning_rate": 7.372005464683546e-06, "loss": 0.7774, "step": 6807 }, { "epoch": 1.8780689655172413, "grad_norm": 3.7978649139404297, "learning_rate": 7.3712023204027165e-06, "loss": 0.8387, "step": 6808 }, { "epoch": 1.878344827586207, "grad_norm": 4.146035194396973, "learning_rate": 7.370399097180827e-06, "loss": 0.836, "step": 6809 }, { "epoch": 1.8786206896551723, "grad_norm": 4.387266635894775, "learning_rate": 7.369595795044621e-06, "loss": 0.8998, "step": 6810 }, { "epoch": 1.878896551724138, "grad_norm": 3.6996476650238037, "learning_rate": 7.368792414020838e-06, "loss": 0.8729, "step": 6811 }, { "epoch": 1.8791724137931034, "grad_norm": 4.0254034996032715, "learning_rate": 7.3679889541362274e-06, "loss": 0.9451, "step": 6812 }, { "epoch": 1.879448275862069, "grad_norm": 3.8705520629882812, "learning_rate": 7.3671854154175335e-06, "loss": 0.8006, "step": 6813 }, { "epoch": 1.8797241379310345, "grad_norm": 4.036379337310791, "learning_rate": 7.366381797891512e-06, "loss": 0.7122, "step": 6814 }, { "epoch": 1.88, "grad_norm": 4.271423816680908, "learning_rate": 7.365578101584913e-06, "loss": 0.792, "step": 6815 }, { "epoch": 1.8802758620689655, "grad_norm": 4.158880710601807, "learning_rate": 7.364774326524493e-06, "loss": 0.9879, "step": 6816 }, { "epoch": 1.880551724137931, "grad_norm": 3.590198278427124, "learning_rate": 7.3639704727370124e-06, "loss": 0.7638, "step": 6817 }, { "epoch": 1.8808275862068966, "grad_norm": 3.7362797260284424, "learning_rate": 7.363166540249234e-06, "loss": 0.6922, "step": 6818 }, { "epoch": 1.8811034482758622, "grad_norm": 3.7394754886627197, "learning_rate": 7.3623625290879186e-06, "loss": 0.7348, "step": 6819 }, { "epoch": 1.8813793103448275, "grad_norm": 3.7807564735412598, "learning_rate": 7.361558439279836e-06, "loss": 0.895, "step": 6820 }, { "epoch": 1.881655172413793, "grad_norm": 3.7527263164520264, "learning_rate": 7.360754270851753e-06, "loss": 0.6711, "step": 6821 }, { "epoch": 1.8819310344827587, "grad_norm": 4.1090874671936035, "learning_rate": 7.359950023830444e-06, "loss": 0.9146, "step": 6822 }, { "epoch": 1.882206896551724, "grad_norm": 3.897474765777588, "learning_rate": 7.3591456982426825e-06, "loss": 0.767, "step": 6823 }, { "epoch": 1.8824827586206898, "grad_norm": 3.931187391281128, "learning_rate": 7.358341294115245e-06, "loss": 0.7359, "step": 6824 }, { "epoch": 1.8827586206896552, "grad_norm": 3.9952003955841064, "learning_rate": 7.357536811474914e-06, "loss": 0.7422, "step": 6825 }, { "epoch": 1.8830344827586207, "grad_norm": 4.254235744476318, "learning_rate": 7.356732250348469e-06, "loss": 0.8075, "step": 6826 }, { "epoch": 1.8833103448275863, "grad_norm": 3.8033745288848877, "learning_rate": 7.355927610762696e-06, "loss": 0.7383, "step": 6827 }, { "epoch": 1.8835862068965517, "grad_norm": 3.8468751907348633, "learning_rate": 7.355122892744383e-06, "loss": 0.784, "step": 6828 }, { "epoch": 1.8838620689655172, "grad_norm": 4.247132778167725, "learning_rate": 7.354318096320321e-06, "loss": 0.7968, "step": 6829 }, { "epoch": 1.8841379310344828, "grad_norm": 4.169243335723877, "learning_rate": 7.353513221517303e-06, "loss": 0.8837, "step": 6830 }, { "epoch": 1.8844137931034481, "grad_norm": 4.070045471191406, "learning_rate": 7.352708268362123e-06, "loss": 0.8002, "step": 6831 }, { "epoch": 1.884689655172414, "grad_norm": 3.7822506427764893, "learning_rate": 7.351903236881579e-06, "loss": 0.7572, "step": 6832 }, { "epoch": 1.8849655172413793, "grad_norm": 3.8467514514923096, "learning_rate": 7.351098127102475e-06, "loss": 0.7608, "step": 6833 }, { "epoch": 1.8852413793103449, "grad_norm": 4.251412391662598, "learning_rate": 7.350292939051611e-06, "loss": 0.7252, "step": 6834 }, { "epoch": 1.8855172413793104, "grad_norm": 4.235902309417725, "learning_rate": 7.349487672755794e-06, "loss": 0.8468, "step": 6835 }, { "epoch": 1.8857931034482758, "grad_norm": 4.320734977722168, "learning_rate": 7.348682328241833e-06, "loss": 0.8782, "step": 6836 }, { "epoch": 1.8860689655172413, "grad_norm": 4.112489700317383, "learning_rate": 7.3478769055365394e-06, "loss": 0.7429, "step": 6837 }, { "epoch": 1.886344827586207, "grad_norm": 4.288210868835449, "learning_rate": 7.347071404666725e-06, "loss": 0.9712, "step": 6838 }, { "epoch": 1.8866206896551723, "grad_norm": 3.6995902061462402, "learning_rate": 7.346265825659211e-06, "loss": 0.7457, "step": 6839 }, { "epoch": 1.886896551724138, "grad_norm": 4.041952133178711, "learning_rate": 7.345460168540811e-06, "loss": 0.8715, "step": 6840 }, { "epoch": 1.8871724137931034, "grad_norm": 4.038161754608154, "learning_rate": 7.344654433338349e-06, "loss": 0.7895, "step": 6841 }, { "epoch": 1.887448275862069, "grad_norm": 3.5957579612731934, "learning_rate": 7.343848620078648e-06, "loss": 0.7865, "step": 6842 }, { "epoch": 1.8877241379310346, "grad_norm": 4.282541275024414, "learning_rate": 7.3430427287885375e-06, "loss": 0.7982, "step": 6843 }, { "epoch": 1.888, "grad_norm": 4.217337608337402, "learning_rate": 7.342236759494844e-06, "loss": 0.8805, "step": 6844 }, { "epoch": 1.8882758620689655, "grad_norm": 4.420050621032715, "learning_rate": 7.3414307122244e-06, "loss": 0.7634, "step": 6845 }, { "epoch": 1.888551724137931, "grad_norm": 4.044604778289795, "learning_rate": 7.340624587004041e-06, "loss": 0.7982, "step": 6846 }, { "epoch": 1.8888275862068964, "grad_norm": 4.087890148162842, "learning_rate": 7.339818383860604e-06, "loss": 0.8041, "step": 6847 }, { "epoch": 1.8891034482758622, "grad_norm": 3.6885125637054443, "learning_rate": 7.3390121028209295e-06, "loss": 0.853, "step": 6848 }, { "epoch": 1.8893793103448275, "grad_norm": 3.885056495666504, "learning_rate": 7.338205743911858e-06, "loss": 0.7587, "step": 6849 }, { "epoch": 1.889655172413793, "grad_norm": 3.936742067337036, "learning_rate": 7.337399307160235e-06, "loss": 0.7733, "step": 6850 }, { "epoch": 1.8899310344827587, "grad_norm": 4.095416069030762, "learning_rate": 7.336592792592911e-06, "loss": 0.7848, "step": 6851 }, { "epoch": 1.890206896551724, "grad_norm": 3.932055950164795, "learning_rate": 7.3357862002367305e-06, "loss": 0.7073, "step": 6852 }, { "epoch": 1.8904827586206898, "grad_norm": 3.727544069290161, "learning_rate": 7.33497953011855e-06, "loss": 0.6905, "step": 6853 }, { "epoch": 1.8907586206896552, "grad_norm": 4.176732063293457, "learning_rate": 7.334172782265225e-06, "loss": 0.7717, "step": 6854 }, { "epoch": 1.8910344827586207, "grad_norm": 3.7823238372802734, "learning_rate": 7.333365956703613e-06, "loss": 0.7862, "step": 6855 }, { "epoch": 1.8913103448275863, "grad_norm": 3.76694655418396, "learning_rate": 7.332559053460573e-06, "loss": 0.8039, "step": 6856 }, { "epoch": 1.8915862068965517, "grad_norm": 4.4023332595825195, "learning_rate": 7.33175207256297e-06, "loss": 0.8317, "step": 6857 }, { "epoch": 1.8918620689655172, "grad_norm": 4.27048397064209, "learning_rate": 7.330945014037667e-06, "loss": 0.7836, "step": 6858 }, { "epoch": 1.8921379310344828, "grad_norm": 4.047051429748535, "learning_rate": 7.330137877911536e-06, "loss": 0.9425, "step": 6859 }, { "epoch": 1.8924137931034481, "grad_norm": 3.914581060409546, "learning_rate": 7.329330664211446e-06, "loss": 0.7833, "step": 6860 }, { "epoch": 1.892689655172414, "grad_norm": 3.6867778301239014, "learning_rate": 7.328523372964269e-06, "loss": 0.7969, "step": 6861 }, { "epoch": 1.8929655172413793, "grad_norm": 4.56722354888916, "learning_rate": 7.327716004196883e-06, "loss": 0.8443, "step": 6862 }, { "epoch": 1.8932413793103449, "grad_norm": 3.935129165649414, "learning_rate": 7.326908557936165e-06, "loss": 0.7982, "step": 6863 }, { "epoch": 1.8935172413793104, "grad_norm": 4.259500980377197, "learning_rate": 7.326101034208999e-06, "loss": 0.9969, "step": 6864 }, { "epoch": 1.8937931034482758, "grad_norm": 4.013774394989014, "learning_rate": 7.325293433042265e-06, "loss": 0.7921, "step": 6865 }, { "epoch": 1.8940689655172414, "grad_norm": 4.237043857574463, "learning_rate": 7.3244857544628515e-06, "loss": 0.7443, "step": 6866 }, { "epoch": 1.894344827586207, "grad_norm": 3.7281200885772705, "learning_rate": 7.323677998497647e-06, "loss": 0.8383, "step": 6867 }, { "epoch": 1.8946206896551723, "grad_norm": 3.7447714805603027, "learning_rate": 7.322870165173543e-06, "loss": 0.7779, "step": 6868 }, { "epoch": 1.894896551724138, "grad_norm": 3.7811062335968018, "learning_rate": 7.322062254517435e-06, "loss": 0.8076, "step": 6869 }, { "epoch": 1.8951724137931034, "grad_norm": 3.7327394485473633, "learning_rate": 7.321254266556217e-06, "loss": 0.7985, "step": 6870 }, { "epoch": 1.895448275862069, "grad_norm": 3.7982864379882812, "learning_rate": 7.320446201316789e-06, "loss": 0.7762, "step": 6871 }, { "epoch": 1.8957241379310346, "grad_norm": 3.5761754512786865, "learning_rate": 7.319638058826052e-06, "loss": 0.781, "step": 6872 }, { "epoch": 1.896, "grad_norm": 4.110396862030029, "learning_rate": 7.318829839110913e-06, "loss": 0.9409, "step": 6873 }, { "epoch": 1.8962758620689655, "grad_norm": 3.92946720123291, "learning_rate": 7.318021542198277e-06, "loss": 0.7904, "step": 6874 }, { "epoch": 1.896551724137931, "grad_norm": 4.3307366371154785, "learning_rate": 7.317213168115053e-06, "loss": 1.0195, "step": 6875 }, { "epoch": 1.8968275862068964, "grad_norm": 4.304469108581543, "learning_rate": 7.316404716888154e-06, "loss": 0.8898, "step": 6876 }, { "epoch": 1.8971034482758622, "grad_norm": 4.280037879943848, "learning_rate": 7.315596188544496e-06, "loss": 0.8215, "step": 6877 }, { "epoch": 1.8973793103448275, "grad_norm": 4.185645580291748, "learning_rate": 7.314787583110994e-06, "loss": 0.8628, "step": 6878 }, { "epoch": 1.8976551724137931, "grad_norm": 3.5104057788848877, "learning_rate": 7.313978900614566e-06, "loss": 0.7711, "step": 6879 }, { "epoch": 1.8979310344827587, "grad_norm": 3.9751927852630615, "learning_rate": 7.31317014108214e-06, "loss": 0.7596, "step": 6880 }, { "epoch": 1.898206896551724, "grad_norm": 3.826489210128784, "learning_rate": 7.3123613045406365e-06, "loss": 0.7515, "step": 6881 }, { "epoch": 1.8984827586206896, "grad_norm": 4.020702838897705, "learning_rate": 7.311552391016982e-06, "loss": 0.824, "step": 6882 }, { "epoch": 1.8987586206896552, "grad_norm": 3.9959604740142822, "learning_rate": 7.310743400538112e-06, "loss": 0.7694, "step": 6883 }, { "epoch": 1.8990344827586205, "grad_norm": 3.8524887561798096, "learning_rate": 7.309934333130954e-06, "loss": 0.7702, "step": 6884 }, { "epoch": 1.8993103448275863, "grad_norm": 3.9789767265319824, "learning_rate": 7.309125188822443e-06, "loss": 0.783, "step": 6885 }, { "epoch": 1.8995862068965517, "grad_norm": 3.396406888961792, "learning_rate": 7.308315967639521e-06, "loss": 0.6749, "step": 6886 }, { "epoch": 1.8998620689655172, "grad_norm": 3.7310149669647217, "learning_rate": 7.3075066696091245e-06, "loss": 0.7428, "step": 6887 }, { "epoch": 1.9001379310344828, "grad_norm": 3.735319137573242, "learning_rate": 7.306697294758197e-06, "loss": 0.7291, "step": 6888 }, { "epoch": 1.9004137931034482, "grad_norm": 3.9026176929473877, "learning_rate": 7.305887843113686e-06, "loss": 0.8319, "step": 6889 }, { "epoch": 1.900689655172414, "grad_norm": 3.9592690467834473, "learning_rate": 7.305078314702536e-06, "loss": 0.8984, "step": 6890 }, { "epoch": 1.9009655172413793, "grad_norm": 3.9493861198425293, "learning_rate": 7.304268709551699e-06, "loss": 0.7591, "step": 6891 }, { "epoch": 1.9012413793103449, "grad_norm": 3.905012369155884, "learning_rate": 7.303459027688129e-06, "loss": 0.8156, "step": 6892 }, { "epoch": 1.9015172413793104, "grad_norm": 4.204957962036133, "learning_rate": 7.302649269138782e-06, "loss": 0.9068, "step": 6893 }, { "epoch": 1.9017931034482758, "grad_norm": 3.741628885269165, "learning_rate": 7.301839433930613e-06, "loss": 0.807, "step": 6894 }, { "epoch": 1.9020689655172414, "grad_norm": 3.577441453933716, "learning_rate": 7.301029522090586e-06, "loss": 0.7523, "step": 6895 }, { "epoch": 1.902344827586207, "grad_norm": 3.9123222827911377, "learning_rate": 7.3002195336456615e-06, "loss": 0.8699, "step": 6896 }, { "epoch": 1.9026206896551723, "grad_norm": 3.7574808597564697, "learning_rate": 7.299409468622809e-06, "loss": 0.8985, "step": 6897 }, { "epoch": 1.902896551724138, "grad_norm": 4.30738639831543, "learning_rate": 7.298599327048992e-06, "loss": 1.0053, "step": 6898 }, { "epoch": 1.9031724137931034, "grad_norm": 3.5889625549316406, "learning_rate": 7.2977891089511855e-06, "loss": 0.8244, "step": 6899 }, { "epoch": 1.903448275862069, "grad_norm": 4.052715301513672, "learning_rate": 7.296978814356361e-06, "loss": 0.83, "step": 6900 }, { "epoch": 1.9037241379310346, "grad_norm": 3.506293296813965, "learning_rate": 7.296168443291495e-06, "loss": 0.7229, "step": 6901 }, { "epoch": 1.904, "grad_norm": 3.69697642326355, "learning_rate": 7.2953579957835655e-06, "loss": 0.7052, "step": 6902 }, { "epoch": 1.9042758620689655, "grad_norm": 3.791950225830078, "learning_rate": 7.294547471859555e-06, "loss": 0.7716, "step": 6903 }, { "epoch": 1.904551724137931, "grad_norm": 3.8704934120178223, "learning_rate": 7.293736871546444e-06, "loss": 0.8134, "step": 6904 }, { "epoch": 1.9048275862068964, "grad_norm": 4.148856163024902, "learning_rate": 7.292926194871222e-06, "loss": 0.8379, "step": 6905 }, { "epoch": 1.9051034482758622, "grad_norm": 3.7002804279327393, "learning_rate": 7.292115441860876e-06, "loss": 0.8018, "step": 6906 }, { "epoch": 1.9053793103448275, "grad_norm": 3.9635744094848633, "learning_rate": 7.291304612542398e-06, "loss": 0.7776, "step": 6907 }, { "epoch": 1.9056551724137931, "grad_norm": 4.352365493774414, "learning_rate": 7.2904937069427804e-06, "loss": 0.8933, "step": 6908 }, { "epoch": 1.9059310344827587, "grad_norm": 4.440613269805908, "learning_rate": 7.289682725089022e-06, "loss": 0.8961, "step": 6909 }, { "epoch": 1.906206896551724, "grad_norm": 3.9853932857513428, "learning_rate": 7.288871667008119e-06, "loss": 0.7941, "step": 6910 }, { "epoch": 1.9064827586206896, "grad_norm": 3.922588348388672, "learning_rate": 7.288060532727075e-06, "loss": 0.8379, "step": 6911 }, { "epoch": 1.9067586206896552, "grad_norm": 3.720837354660034, "learning_rate": 7.287249322272893e-06, "loss": 0.7543, "step": 6912 }, { "epoch": 1.9070344827586205, "grad_norm": 3.768592596054077, "learning_rate": 7.286438035672579e-06, "loss": 0.7864, "step": 6913 }, { "epoch": 1.9073103448275863, "grad_norm": 4.101725101470947, "learning_rate": 7.285626672953144e-06, "loss": 0.927, "step": 6914 }, { "epoch": 1.9075862068965517, "grad_norm": 4.308157920837402, "learning_rate": 7.284815234141595e-06, "loss": 0.8638, "step": 6915 }, { "epoch": 1.9078620689655172, "grad_norm": 4.182886600494385, "learning_rate": 7.284003719264952e-06, "loss": 0.7852, "step": 6916 }, { "epoch": 1.9081379310344828, "grad_norm": 4.0583672523498535, "learning_rate": 7.283192128350226e-06, "loss": 0.9129, "step": 6917 }, { "epoch": 1.9084137931034482, "grad_norm": 4.082306861877441, "learning_rate": 7.282380461424441e-06, "loss": 0.8112, "step": 6918 }, { "epoch": 1.908689655172414, "grad_norm": 3.728001356124878, "learning_rate": 7.281568718514616e-06, "loss": 0.8161, "step": 6919 }, { "epoch": 1.9089655172413793, "grad_norm": 3.4142649173736572, "learning_rate": 7.280756899647774e-06, "loss": 0.7872, "step": 6920 }, { "epoch": 1.9092413793103449, "grad_norm": 3.65854811668396, "learning_rate": 7.279945004850944e-06, "loss": 0.8483, "step": 6921 }, { "epoch": 1.9095172413793104, "grad_norm": 3.822927236557007, "learning_rate": 7.279133034151157e-06, "loss": 0.7902, "step": 6922 }, { "epoch": 1.9097931034482758, "grad_norm": 3.8877310752868652, "learning_rate": 7.27832098757544e-06, "loss": 0.7622, "step": 6923 }, { "epoch": 1.9100689655172414, "grad_norm": 3.790039300918579, "learning_rate": 7.277508865150832e-06, "loss": 0.712, "step": 6924 }, { "epoch": 1.910344827586207, "grad_norm": 3.733238935470581, "learning_rate": 7.276696666904366e-06, "loss": 0.8309, "step": 6925 }, { "epoch": 1.9106206896551723, "grad_norm": 3.9034194946289062, "learning_rate": 7.275884392863084e-06, "loss": 0.9534, "step": 6926 }, { "epoch": 1.910896551724138, "grad_norm": 3.9185049533843994, "learning_rate": 7.275072043054026e-06, "loss": 0.8255, "step": 6927 }, { "epoch": 1.9111724137931034, "grad_norm": 3.524728536605835, "learning_rate": 7.2742596175042365e-06, "loss": 0.7002, "step": 6928 }, { "epoch": 1.911448275862069, "grad_norm": 3.7942137718200684, "learning_rate": 7.273447116240764e-06, "loss": 0.72, "step": 6929 }, { "epoch": 1.9117241379310346, "grad_norm": 3.8314826488494873, "learning_rate": 7.272634539290657e-06, "loss": 0.7284, "step": 6930 }, { "epoch": 1.912, "grad_norm": 4.008951187133789, "learning_rate": 7.2718218866809674e-06, "loss": 0.8264, "step": 6931 }, { "epoch": 1.9122758620689655, "grad_norm": 4.127923965454102, "learning_rate": 7.271009158438751e-06, "loss": 0.7948, "step": 6932 }, { "epoch": 1.912551724137931, "grad_norm": 3.713554859161377, "learning_rate": 7.2701963545910625e-06, "loss": 0.7671, "step": 6933 }, { "epoch": 1.9128275862068964, "grad_norm": 4.052338600158691, "learning_rate": 7.269383475164964e-06, "loss": 0.7757, "step": 6934 }, { "epoch": 1.9131034482758622, "grad_norm": 3.95467472076416, "learning_rate": 7.268570520187514e-06, "loss": 0.8939, "step": 6935 }, { "epoch": 1.9133793103448276, "grad_norm": 4.254513263702393, "learning_rate": 7.267757489685782e-06, "loss": 0.8362, "step": 6936 }, { "epoch": 1.9136551724137931, "grad_norm": 3.8589611053466797, "learning_rate": 7.266944383686829e-06, "loss": 0.7716, "step": 6937 }, { "epoch": 1.9139310344827587, "grad_norm": 4.142784118652344, "learning_rate": 7.26613120221773e-06, "loss": 0.7756, "step": 6938 }, { "epoch": 1.914206896551724, "grad_norm": 3.9016168117523193, "learning_rate": 7.2653179453055535e-06, "loss": 0.8669, "step": 6939 }, { "epoch": 1.9144827586206896, "grad_norm": 3.767364978790283, "learning_rate": 7.264504612977377e-06, "loss": 0.7969, "step": 6940 }, { "epoch": 1.9147586206896552, "grad_norm": 4.181081295013428, "learning_rate": 7.263691205260275e-06, "loss": 0.9296, "step": 6941 }, { "epoch": 1.9150344827586205, "grad_norm": 3.701610565185547, "learning_rate": 7.2628777221813276e-06, "loss": 0.7675, "step": 6942 }, { "epoch": 1.9153103448275863, "grad_norm": 3.7322442531585693, "learning_rate": 7.262064163767619e-06, "loss": 0.7236, "step": 6943 }, { "epoch": 1.9155862068965517, "grad_norm": 3.7899861335754395, "learning_rate": 7.261250530046232e-06, "loss": 0.7452, "step": 6944 }, { "epoch": 1.9158620689655173, "grad_norm": 3.917403221130371, "learning_rate": 7.260436821044255e-06, "loss": 0.8814, "step": 6945 }, { "epoch": 1.9161379310344828, "grad_norm": 3.964028835296631, "learning_rate": 7.259623036788777e-06, "loss": 0.8833, "step": 6946 }, { "epoch": 1.9164137931034482, "grad_norm": 4.00400972366333, "learning_rate": 7.258809177306888e-06, "loss": 0.8887, "step": 6947 }, { "epoch": 1.9166896551724137, "grad_norm": 4.516798973083496, "learning_rate": 7.257995242625685e-06, "loss": 0.7487, "step": 6948 }, { "epoch": 1.9169655172413793, "grad_norm": 3.3083877563476562, "learning_rate": 7.257181232772266e-06, "loss": 0.7438, "step": 6949 }, { "epoch": 1.9172413793103447, "grad_norm": 4.234508514404297, "learning_rate": 7.256367147773728e-06, "loss": 0.9112, "step": 6950 }, { "epoch": 1.9175172413793105, "grad_norm": 3.8975117206573486, "learning_rate": 7.2555529876571776e-06, "loss": 0.6951, "step": 6951 }, { "epoch": 1.9177931034482758, "grad_norm": 3.9048821926116943, "learning_rate": 7.254738752449714e-06, "loss": 0.7397, "step": 6952 }, { "epoch": 1.9180689655172414, "grad_norm": 3.6988980770111084, "learning_rate": 7.253924442178448e-06, "loss": 0.6812, "step": 6953 }, { "epoch": 1.918344827586207, "grad_norm": 3.9442243576049805, "learning_rate": 7.253110056870487e-06, "loss": 0.7383, "step": 6954 }, { "epoch": 1.9186206896551723, "grad_norm": 3.8372678756713867, "learning_rate": 7.252295596552946e-06, "loss": 0.795, "step": 6955 }, { "epoch": 1.918896551724138, "grad_norm": 4.26871919631958, "learning_rate": 7.2514810612529365e-06, "loss": 1.0191, "step": 6956 }, { "epoch": 1.9191724137931034, "grad_norm": 4.188805103302002, "learning_rate": 7.250666450997578e-06, "loss": 0.8708, "step": 6957 }, { "epoch": 1.919448275862069, "grad_norm": 3.618335723876953, "learning_rate": 7.249851765813989e-06, "loss": 0.7709, "step": 6958 }, { "epoch": 1.9197241379310346, "grad_norm": 4.524926662445068, "learning_rate": 7.249037005729293e-06, "loss": 0.8183, "step": 6959 }, { "epoch": 1.92, "grad_norm": 4.11630916595459, "learning_rate": 7.248222170770611e-06, "loss": 0.7641, "step": 6960 }, { "epoch": 1.9202758620689655, "grad_norm": 3.4713635444641113, "learning_rate": 7.247407260965076e-06, "loss": 0.7026, "step": 6961 }, { "epoch": 1.920551724137931, "grad_norm": 3.6072933673858643, "learning_rate": 7.246592276339812e-06, "loss": 0.8211, "step": 6962 }, { "epoch": 1.9208275862068964, "grad_norm": 3.364515542984009, "learning_rate": 7.245777216921955e-06, "loss": 0.6799, "step": 6963 }, { "epoch": 1.9211034482758622, "grad_norm": 3.6139886379241943, "learning_rate": 7.244962082738638e-06, "loss": 0.7448, "step": 6964 }, { "epoch": 1.9213793103448276, "grad_norm": 3.6847591400146484, "learning_rate": 7.244146873816997e-06, "loss": 0.8383, "step": 6965 }, { "epoch": 1.9216551724137931, "grad_norm": 4.216929912567139, "learning_rate": 7.243331590184174e-06, "loss": 0.7808, "step": 6966 }, { "epoch": 1.9219310344827587, "grad_norm": 4.018702030181885, "learning_rate": 7.242516231867309e-06, "loss": 0.7355, "step": 6967 }, { "epoch": 1.922206896551724, "grad_norm": 4.418396949768066, "learning_rate": 7.241700798893548e-06, "loss": 0.9315, "step": 6968 }, { "epoch": 1.9224827586206896, "grad_norm": 3.9186418056488037, "learning_rate": 7.240885291290038e-06, "loss": 0.7876, "step": 6969 }, { "epoch": 1.9227586206896552, "grad_norm": 4.072790145874023, "learning_rate": 7.240069709083928e-06, "loss": 0.8072, "step": 6970 }, { "epoch": 1.9230344827586205, "grad_norm": 4.021246910095215, "learning_rate": 7.239254052302369e-06, "loss": 0.9112, "step": 6971 }, { "epoch": 1.9233103448275863, "grad_norm": 3.866960287094116, "learning_rate": 7.2384383209725184e-06, "loss": 0.974, "step": 6972 }, { "epoch": 1.9235862068965517, "grad_norm": 3.6973447799682617, "learning_rate": 7.237622515121529e-06, "loss": 0.8176, "step": 6973 }, { "epoch": 1.9238620689655173, "grad_norm": 3.8912951946258545, "learning_rate": 7.236806634776564e-06, "loss": 0.6839, "step": 6974 }, { "epoch": 1.9241379310344828, "grad_norm": 3.853271245956421, "learning_rate": 7.235990679964784e-06, "loss": 0.7191, "step": 6975 }, { "epoch": 1.9244137931034482, "grad_norm": 3.8657422065734863, "learning_rate": 7.2351746507133524e-06, "loss": 0.8136, "step": 6976 }, { "epoch": 1.9246896551724137, "grad_norm": 3.720661163330078, "learning_rate": 7.234358547049437e-06, "loss": 0.7562, "step": 6977 }, { "epoch": 1.9249655172413793, "grad_norm": 4.224544525146484, "learning_rate": 7.233542369000209e-06, "loss": 0.9254, "step": 6978 }, { "epoch": 1.9252413793103447, "grad_norm": 3.8877549171447754, "learning_rate": 7.232726116592836e-06, "loss": 0.7335, "step": 6979 }, { "epoch": 1.9255172413793105, "grad_norm": 3.759173631668091, "learning_rate": 7.231909789854498e-06, "loss": 0.8102, "step": 6980 }, { "epoch": 1.9257931034482758, "grad_norm": 3.679044246673584, "learning_rate": 7.231093388812367e-06, "loss": 0.7761, "step": 6981 }, { "epoch": 1.9260689655172414, "grad_norm": 4.19349479675293, "learning_rate": 7.230276913493623e-06, "loss": 0.7836, "step": 6982 }, { "epoch": 1.926344827586207, "grad_norm": 3.716993570327759, "learning_rate": 7.22946036392545e-06, "loss": 0.7135, "step": 6983 }, { "epoch": 1.9266206896551723, "grad_norm": 4.148251533508301, "learning_rate": 7.228643740135029e-06, "loss": 0.8023, "step": 6984 }, { "epoch": 1.926896551724138, "grad_norm": 3.5078938007354736, "learning_rate": 7.22782704214955e-06, "loss": 0.739, "step": 6985 }, { "epoch": 1.9271724137931034, "grad_norm": 4.0953688621521, "learning_rate": 7.227010269996202e-06, "loss": 0.8161, "step": 6986 }, { "epoch": 1.927448275862069, "grad_norm": 4.258573055267334, "learning_rate": 7.2261934237021724e-06, "loss": 0.8619, "step": 6987 }, { "epoch": 1.9277241379310346, "grad_norm": 4.599178314208984, "learning_rate": 7.225376503294661e-06, "loss": 0.8555, "step": 6988 }, { "epoch": 1.928, "grad_norm": 3.8500006198883057, "learning_rate": 7.22455950880086e-06, "loss": 0.7197, "step": 6989 }, { "epoch": 1.9282758620689655, "grad_norm": 4.171547889709473, "learning_rate": 7.223742440247972e-06, "loss": 0.9126, "step": 6990 }, { "epoch": 1.928551724137931, "grad_norm": 3.894791603088379, "learning_rate": 7.2229252976631934e-06, "loss": 0.7374, "step": 6991 }, { "epoch": 1.9288275862068964, "grad_norm": 4.319631576538086, "learning_rate": 7.222108081073733e-06, "loss": 0.7291, "step": 6992 }, { "epoch": 1.9291034482758622, "grad_norm": 3.8590781688690186, "learning_rate": 7.221290790506796e-06, "loss": 0.6991, "step": 6993 }, { "epoch": 1.9293793103448276, "grad_norm": 4.02409029006958, "learning_rate": 7.220473425989589e-06, "loss": 0.7763, "step": 6994 }, { "epoch": 1.9296551724137931, "grad_norm": 3.40613055229187, "learning_rate": 7.2196559875493265e-06, "loss": 0.7267, "step": 6995 }, { "epoch": 1.9299310344827587, "grad_norm": 3.831341028213501, "learning_rate": 7.218838475213221e-06, "loss": 0.702, "step": 6996 }, { "epoch": 1.930206896551724, "grad_norm": 4.432506084442139, "learning_rate": 7.218020889008489e-06, "loss": 0.8239, "step": 6997 }, { "epoch": 1.9304827586206896, "grad_norm": 4.091574668884277, "learning_rate": 7.217203228962348e-06, "loss": 0.9278, "step": 6998 }, { "epoch": 1.9307586206896552, "grad_norm": 3.332481622695923, "learning_rate": 7.216385495102022e-06, "loss": 0.7342, "step": 6999 }, { "epoch": 1.9310344827586206, "grad_norm": 3.8439903259277344, "learning_rate": 7.215567687454732e-06, "loss": 0.8151, "step": 7000 }, { "epoch": 1.9310344827586206, "eval_loss": 1.2560398578643799, "eval_runtime": 13.659, "eval_samples_per_second": 29.285, "eval_steps_per_second": 3.661, "step": 7000 }, { "epoch": 1.9313103448275863, "grad_norm": 4.503711223602295, "learning_rate": 7.214749806047705e-06, "loss": 0.8945, "step": 7001 }, { "epoch": 1.9315862068965517, "grad_norm": 3.529581069946289, "learning_rate": 7.213931850908169e-06, "loss": 0.701, "step": 7002 }, { "epoch": 1.9318620689655173, "grad_norm": 3.890681743621826, "learning_rate": 7.213113822063355e-06, "loss": 0.7653, "step": 7003 }, { "epoch": 1.9321379310344828, "grad_norm": 3.513721227645874, "learning_rate": 7.2122957195404974e-06, "loss": 0.7724, "step": 7004 }, { "epoch": 1.9324137931034482, "grad_norm": 4.249143123626709, "learning_rate": 7.211477543366833e-06, "loss": 0.888, "step": 7005 }, { "epoch": 1.9326896551724138, "grad_norm": 3.8873026371002197, "learning_rate": 7.210659293569597e-06, "loss": 0.8357, "step": 7006 }, { "epoch": 1.9329655172413793, "grad_norm": 3.531057596206665, "learning_rate": 7.209840970176034e-06, "loss": 0.7305, "step": 7007 }, { "epoch": 1.9332413793103447, "grad_norm": 3.6155478954315186, "learning_rate": 7.209022573213383e-06, "loss": 0.8569, "step": 7008 }, { "epoch": 1.9335172413793105, "grad_norm": 4.328135967254639, "learning_rate": 7.208204102708894e-06, "loss": 0.7834, "step": 7009 }, { "epoch": 1.9337931034482758, "grad_norm": 3.844768762588501, "learning_rate": 7.207385558689812e-06, "loss": 0.7399, "step": 7010 }, { "epoch": 1.9340689655172414, "grad_norm": 3.9164137840270996, "learning_rate": 7.206566941183392e-06, "loss": 0.7778, "step": 7011 }, { "epoch": 1.934344827586207, "grad_norm": 4.198108196258545, "learning_rate": 7.205748250216881e-06, "loss": 0.8614, "step": 7012 }, { "epoch": 1.9346206896551723, "grad_norm": 3.6475436687469482, "learning_rate": 7.204929485817537e-06, "loss": 0.7202, "step": 7013 }, { "epoch": 1.9348965517241379, "grad_norm": 3.9749279022216797, "learning_rate": 7.204110648012619e-06, "loss": 0.8019, "step": 7014 }, { "epoch": 1.9351724137931035, "grad_norm": 4.010497570037842, "learning_rate": 7.203291736829387e-06, "loss": 0.7835, "step": 7015 }, { "epoch": 1.9354482758620688, "grad_norm": 3.872936725616455, "learning_rate": 7.202472752295104e-06, "loss": 0.9513, "step": 7016 }, { "epoch": 1.9357241379310346, "grad_norm": 3.8525264263153076, "learning_rate": 7.201653694437036e-06, "loss": 0.7902, "step": 7017 }, { "epoch": 1.936, "grad_norm": 3.928518295288086, "learning_rate": 7.200834563282448e-06, "loss": 0.776, "step": 7018 }, { "epoch": 1.9362758620689655, "grad_norm": 3.599921703338623, "learning_rate": 7.200015358858614e-06, "loss": 0.7619, "step": 7019 }, { "epoch": 1.936551724137931, "grad_norm": 3.6812851428985596, "learning_rate": 7.199196081192802e-06, "loss": 0.7469, "step": 7020 }, { "epoch": 1.9368275862068964, "grad_norm": 4.056465148925781, "learning_rate": 7.1983767303122905e-06, "loss": 0.7945, "step": 7021 }, { "epoch": 1.9371034482758622, "grad_norm": 3.81303334236145, "learning_rate": 7.197557306244355e-06, "loss": 0.7417, "step": 7022 }, { "epoch": 1.9373793103448276, "grad_norm": 3.780668258666992, "learning_rate": 7.196737809016279e-06, "loss": 0.7364, "step": 7023 }, { "epoch": 1.9376551724137931, "grad_norm": 4.385805130004883, "learning_rate": 7.1959182386553395e-06, "loss": 0.9235, "step": 7024 }, { "epoch": 1.9379310344827587, "grad_norm": 3.9007744789123535, "learning_rate": 7.195098595188824e-06, "loss": 0.7666, "step": 7025 }, { "epoch": 1.938206896551724, "grad_norm": 4.138296127319336, "learning_rate": 7.194278878644022e-06, "loss": 0.8566, "step": 7026 }, { "epoch": 1.9384827586206896, "grad_norm": 3.979491710662842, "learning_rate": 7.193459089048219e-06, "loss": 0.7288, "step": 7027 }, { "epoch": 1.9387586206896552, "grad_norm": 3.7100706100463867, "learning_rate": 7.192639226428711e-06, "loss": 0.738, "step": 7028 }, { "epoch": 1.9390344827586206, "grad_norm": 3.6177353858947754, "learning_rate": 7.191819290812788e-06, "loss": 0.7249, "step": 7029 }, { "epoch": 1.9393103448275864, "grad_norm": 4.144781112670898, "learning_rate": 7.190999282227752e-06, "loss": 0.8723, "step": 7030 }, { "epoch": 1.9395862068965517, "grad_norm": 4.060194492340088, "learning_rate": 7.190179200700898e-06, "loss": 0.8167, "step": 7031 }, { "epoch": 1.9398620689655173, "grad_norm": 3.6607906818389893, "learning_rate": 7.18935904625953e-06, "loss": 0.8091, "step": 7032 }, { "epoch": 1.9401379310344828, "grad_norm": 3.922072172164917, "learning_rate": 7.188538818930951e-06, "loss": 0.8045, "step": 7033 }, { "epoch": 1.9404137931034482, "grad_norm": 3.551044225692749, "learning_rate": 7.18771851874247e-06, "loss": 0.6852, "step": 7034 }, { "epoch": 1.9406896551724138, "grad_norm": 4.0561723709106445, "learning_rate": 7.186898145721395e-06, "loss": 0.8133, "step": 7035 }, { "epoch": 1.9409655172413793, "grad_norm": 4.1805291175842285, "learning_rate": 7.186077699895036e-06, "loss": 0.8339, "step": 7036 }, { "epoch": 1.9412413793103447, "grad_norm": 4.220009803771973, "learning_rate": 7.185257181290708e-06, "loss": 0.9848, "step": 7037 }, { "epoch": 1.9415172413793105, "grad_norm": 3.977327346801758, "learning_rate": 7.184436589935729e-06, "loss": 0.8046, "step": 7038 }, { "epoch": 1.9417931034482758, "grad_norm": 4.045729637145996, "learning_rate": 7.183615925857414e-06, "loss": 0.9455, "step": 7039 }, { "epoch": 1.9420689655172414, "grad_norm": 3.9985971450805664, "learning_rate": 7.1827951890830875e-06, "loss": 0.8445, "step": 7040 }, { "epoch": 1.942344827586207, "grad_norm": 3.8217575550079346, "learning_rate": 7.181974379640072e-06, "loss": 0.7823, "step": 7041 }, { "epoch": 1.9426206896551723, "grad_norm": 4.086353778839111, "learning_rate": 7.181153497555694e-06, "loss": 0.8232, "step": 7042 }, { "epoch": 1.9428965517241379, "grad_norm": 3.889805316925049, "learning_rate": 7.18033254285728e-06, "loss": 0.9219, "step": 7043 }, { "epoch": 1.9431724137931035, "grad_norm": 3.5230278968811035, "learning_rate": 7.179511515572165e-06, "loss": 0.7093, "step": 7044 }, { "epoch": 1.9434482758620688, "grad_norm": 3.710527181625366, "learning_rate": 7.178690415727677e-06, "loss": 0.7262, "step": 7045 }, { "epoch": 1.9437241379310346, "grad_norm": 4.197324275970459, "learning_rate": 7.177869243351157e-06, "loss": 0.8128, "step": 7046 }, { "epoch": 1.944, "grad_norm": 3.763737916946411, "learning_rate": 7.177047998469938e-06, "loss": 0.6767, "step": 7047 }, { "epoch": 1.9442758620689655, "grad_norm": 3.7886698246002197, "learning_rate": 7.176226681111364e-06, "loss": 0.8008, "step": 7048 }, { "epoch": 1.944551724137931, "grad_norm": 3.918391227722168, "learning_rate": 7.175405291302777e-06, "loss": 0.7743, "step": 7049 }, { "epoch": 1.9448275862068964, "grad_norm": 3.8797097206115723, "learning_rate": 7.174583829071522e-06, "loss": 0.8531, "step": 7050 }, { "epoch": 1.9451034482758622, "grad_norm": 3.5906312465667725, "learning_rate": 7.173762294444948e-06, "loss": 0.7512, "step": 7051 }, { "epoch": 1.9453793103448276, "grad_norm": 4.124885559082031, "learning_rate": 7.172940687450404e-06, "loss": 0.8229, "step": 7052 }, { "epoch": 1.9456551724137932, "grad_norm": 3.8212947845458984, "learning_rate": 7.172119008115243e-06, "loss": 0.7897, "step": 7053 }, { "epoch": 1.9459310344827587, "grad_norm": 4.022776126861572, "learning_rate": 7.171297256466818e-06, "loss": 0.8627, "step": 7054 }, { "epoch": 1.946206896551724, "grad_norm": 3.8768513202667236, "learning_rate": 7.170475432532491e-06, "loss": 0.7145, "step": 7055 }, { "epoch": 1.9464827586206896, "grad_norm": 3.589170455932617, "learning_rate": 7.169653536339618e-06, "loss": 0.7556, "step": 7056 }, { "epoch": 1.9467586206896552, "grad_norm": 3.860215425491333, "learning_rate": 7.168831567915563e-06, "loss": 0.7854, "step": 7057 }, { "epoch": 1.9470344827586206, "grad_norm": 3.7943766117095947, "learning_rate": 7.168009527287689e-06, "loss": 0.7912, "step": 7058 }, { "epoch": 1.9473103448275864, "grad_norm": 3.879875421524048, "learning_rate": 7.167187414483363e-06, "loss": 0.804, "step": 7059 }, { "epoch": 1.9475862068965517, "grad_norm": 4.012897968292236, "learning_rate": 7.166365229529957e-06, "loss": 0.8058, "step": 7060 }, { "epoch": 1.9478620689655173, "grad_norm": 3.6004858016967773, "learning_rate": 7.165542972454842e-06, "loss": 0.6838, "step": 7061 }, { "epoch": 1.9481379310344829, "grad_norm": 4.2753987312316895, "learning_rate": 7.16472064328539e-06, "loss": 0.9687, "step": 7062 }, { "epoch": 1.9484137931034482, "grad_norm": 3.9690723419189453, "learning_rate": 7.16389824204898e-06, "loss": 0.8732, "step": 7063 }, { "epoch": 1.9486896551724138, "grad_norm": 4.030487060546875, "learning_rate": 7.163075768772989e-06, "loss": 0.7644, "step": 7064 }, { "epoch": 1.9489655172413793, "grad_norm": 3.9118826389312744, "learning_rate": 7.1622532234848004e-06, "loss": 0.7914, "step": 7065 }, { "epoch": 1.9492413793103447, "grad_norm": 3.468839406967163, "learning_rate": 7.161430606211796e-06, "loss": 0.731, "step": 7066 }, { "epoch": 1.9495172413793105, "grad_norm": 3.882866859436035, "learning_rate": 7.160607916981366e-06, "loss": 0.8568, "step": 7067 }, { "epoch": 1.9497931034482758, "grad_norm": 3.5212819576263428, "learning_rate": 7.1597851558208944e-06, "loss": 0.6072, "step": 7068 }, { "epoch": 1.9500689655172414, "grad_norm": 3.8382365703582764, "learning_rate": 7.158962322757773e-06, "loss": 0.7226, "step": 7069 }, { "epoch": 1.950344827586207, "grad_norm": 3.79532790184021, "learning_rate": 7.158139417819397e-06, "loss": 0.7107, "step": 7070 }, { "epoch": 1.9506206896551723, "grad_norm": 3.7747342586517334, "learning_rate": 7.157316441033163e-06, "loss": 0.8731, "step": 7071 }, { "epoch": 1.950896551724138, "grad_norm": 4.112882614135742, "learning_rate": 7.1564933924264664e-06, "loss": 0.74, "step": 7072 }, { "epoch": 1.9511724137931035, "grad_norm": 4.189523220062256, "learning_rate": 7.155670272026709e-06, "loss": 0.7628, "step": 7073 }, { "epoch": 1.9514482758620688, "grad_norm": 4.026365756988525, "learning_rate": 7.154847079861294e-06, "loss": 0.8561, "step": 7074 }, { "epoch": 1.9517241379310346, "grad_norm": 3.745900869369507, "learning_rate": 7.154023815957627e-06, "loss": 0.8535, "step": 7075 }, { "epoch": 1.952, "grad_norm": 4.097027778625488, "learning_rate": 7.153200480343116e-06, "loss": 0.781, "step": 7076 }, { "epoch": 1.9522758620689655, "grad_norm": 4.068685531616211, "learning_rate": 7.152377073045168e-06, "loss": 0.8481, "step": 7077 }, { "epoch": 1.952551724137931, "grad_norm": 4.085955619812012, "learning_rate": 7.1515535940911995e-06, "loss": 0.8206, "step": 7078 }, { "epoch": 1.9528275862068964, "grad_norm": 3.8525474071502686, "learning_rate": 7.1507300435086245e-06, "loss": 0.7305, "step": 7079 }, { "epoch": 1.953103448275862, "grad_norm": 4.187035083770752, "learning_rate": 7.149906421324858e-06, "loss": 0.8463, "step": 7080 }, { "epoch": 1.9533793103448276, "grad_norm": 3.9610607624053955, "learning_rate": 7.149082727567323e-06, "loss": 0.7355, "step": 7081 }, { "epoch": 1.953655172413793, "grad_norm": 4.08058500289917, "learning_rate": 7.148258962263439e-06, "loss": 0.823, "step": 7082 }, { "epoch": 1.9539310344827587, "grad_norm": 3.981215000152588, "learning_rate": 7.147435125440631e-06, "loss": 0.9149, "step": 7083 }, { "epoch": 1.954206896551724, "grad_norm": 4.3457746505737305, "learning_rate": 7.146611217126329e-06, "loss": 0.8348, "step": 7084 }, { "epoch": 1.9544827586206897, "grad_norm": 3.59305739402771, "learning_rate": 7.1457872373479565e-06, "loss": 0.7453, "step": 7085 }, { "epoch": 1.9547586206896552, "grad_norm": 3.5795845985412598, "learning_rate": 7.14496318613295e-06, "loss": 0.7919, "step": 7086 }, { "epoch": 1.9550344827586206, "grad_norm": 4.400238037109375, "learning_rate": 7.14413906350874e-06, "loss": 0.8466, "step": 7087 }, { "epoch": 1.9553103448275864, "grad_norm": 3.8689286708831787, "learning_rate": 7.143314869502764e-06, "loss": 0.8574, "step": 7088 }, { "epoch": 1.9555862068965517, "grad_norm": 3.8773627281188965, "learning_rate": 7.142490604142462e-06, "loss": 0.684, "step": 7089 }, { "epoch": 1.9558620689655173, "grad_norm": 3.9876906871795654, "learning_rate": 7.141666267455274e-06, "loss": 0.7504, "step": 7090 }, { "epoch": 1.9561379310344829, "grad_norm": 3.8322904109954834, "learning_rate": 7.140841859468643e-06, "loss": 0.7721, "step": 7091 }, { "epoch": 1.9564137931034482, "grad_norm": 4.072685241699219, "learning_rate": 7.140017380210016e-06, "loss": 0.8447, "step": 7092 }, { "epoch": 1.9566896551724138, "grad_norm": 3.5777976512908936, "learning_rate": 7.13919282970684e-06, "loss": 0.8173, "step": 7093 }, { "epoch": 1.9569655172413793, "grad_norm": 3.608452081680298, "learning_rate": 7.138368207986568e-06, "loss": 0.7665, "step": 7094 }, { "epoch": 1.9572413793103447, "grad_norm": 3.9700207710266113, "learning_rate": 7.137543515076649e-06, "loss": 0.7904, "step": 7095 }, { "epoch": 1.9575172413793105, "grad_norm": 3.815054416656494, "learning_rate": 7.136718751004541e-06, "loss": 0.8124, "step": 7096 }, { "epoch": 1.9577931034482758, "grad_norm": 3.9774017333984375, "learning_rate": 7.1358939157977015e-06, "loss": 0.7863, "step": 7097 }, { "epoch": 1.9580689655172414, "grad_norm": 3.7526252269744873, "learning_rate": 7.135069009483591e-06, "loss": 0.7104, "step": 7098 }, { "epoch": 1.958344827586207, "grad_norm": 4.184681415557861, "learning_rate": 7.13424403208967e-06, "loss": 0.7992, "step": 7099 }, { "epoch": 1.9586206896551723, "grad_norm": 4.124657154083252, "learning_rate": 7.1334189836434055e-06, "loss": 0.8889, "step": 7100 }, { "epoch": 1.958896551724138, "grad_norm": 3.757807493209839, "learning_rate": 7.132593864172263e-06, "loss": 0.8383, "step": 7101 }, { "epoch": 1.9591724137931035, "grad_norm": 4.087723731994629, "learning_rate": 7.131768673703713e-06, "loss": 1.0159, "step": 7102 }, { "epoch": 1.9594482758620688, "grad_norm": 3.5890955924987793, "learning_rate": 7.1309434122652265e-06, "loss": 0.8539, "step": 7103 }, { "epoch": 1.9597241379310346, "grad_norm": 4.0436906814575195, "learning_rate": 7.130118079884279e-06, "loss": 0.7756, "step": 7104 }, { "epoch": 1.96, "grad_norm": 4.124695301055908, "learning_rate": 7.129292676588346e-06, "loss": 0.9747, "step": 7105 }, { "epoch": 1.9602758620689655, "grad_norm": 4.0425496101379395, "learning_rate": 7.128467202404907e-06, "loss": 0.8103, "step": 7106 }, { "epoch": 1.960551724137931, "grad_norm": 4.244643688201904, "learning_rate": 7.1276416573614425e-06, "loss": 0.8643, "step": 7107 }, { "epoch": 1.9608275862068965, "grad_norm": 3.6698989868164062, "learning_rate": 7.126816041485438e-06, "loss": 0.8709, "step": 7108 }, { "epoch": 1.961103448275862, "grad_norm": 4.096137523651123, "learning_rate": 7.1259903548043785e-06, "loss": 0.7752, "step": 7109 }, { "epoch": 1.9613793103448276, "grad_norm": 4.425635814666748, "learning_rate": 7.1251645973457514e-06, "loss": 0.8717, "step": 7110 }, { "epoch": 1.961655172413793, "grad_norm": 4.725991725921631, "learning_rate": 7.12433876913705e-06, "loss": 0.9565, "step": 7111 }, { "epoch": 1.9619310344827587, "grad_norm": 3.872096300125122, "learning_rate": 7.123512870205765e-06, "loss": 0.7482, "step": 7112 }, { "epoch": 1.962206896551724, "grad_norm": 3.585648536682129, "learning_rate": 7.122686900579393e-06, "loss": 0.8351, "step": 7113 }, { "epoch": 1.9624827586206897, "grad_norm": 3.9719622135162354, "learning_rate": 7.121860860285431e-06, "loss": 0.7225, "step": 7114 }, { "epoch": 1.9627586206896552, "grad_norm": 3.751990556716919, "learning_rate": 7.121034749351381e-06, "loss": 0.8031, "step": 7115 }, { "epoch": 1.9630344827586206, "grad_norm": 3.3481154441833496, "learning_rate": 7.120208567804743e-06, "loss": 0.7507, "step": 7116 }, { "epoch": 1.9633103448275864, "grad_norm": 3.4211812019348145, "learning_rate": 7.119382315673024e-06, "loss": 0.7336, "step": 7117 }, { "epoch": 1.9635862068965517, "grad_norm": 4.220773696899414, "learning_rate": 7.118555992983729e-06, "loss": 0.8527, "step": 7118 }, { "epoch": 1.9638620689655173, "grad_norm": 4.023566722869873, "learning_rate": 7.117729599764372e-06, "loss": 0.8696, "step": 7119 }, { "epoch": 1.9641379310344829, "grad_norm": 4.2150678634643555, "learning_rate": 7.116903136042459e-06, "loss": 0.8544, "step": 7120 }, { "epoch": 1.9644137931034482, "grad_norm": 4.09515905380249, "learning_rate": 7.116076601845509e-06, "loss": 0.7739, "step": 7121 }, { "epoch": 1.9646896551724138, "grad_norm": 3.73054575920105, "learning_rate": 7.115249997201036e-06, "loss": 0.8702, "step": 7122 }, { "epoch": 1.9649655172413794, "grad_norm": 4.075581073760986, "learning_rate": 7.11442332213656e-06, "loss": 0.831, "step": 7123 }, { "epoch": 1.9652413793103447, "grad_norm": 4.002837657928467, "learning_rate": 7.113596576679601e-06, "loss": 0.8044, "step": 7124 }, { "epoch": 1.9655172413793105, "grad_norm": 4.0445556640625, "learning_rate": 7.112769760857685e-06, "loss": 0.7591, "step": 7125 }, { "epoch": 1.9657931034482758, "grad_norm": 3.881300210952759, "learning_rate": 7.111942874698336e-06, "loss": 0.8669, "step": 7126 }, { "epoch": 1.9660689655172414, "grad_norm": 3.993830680847168, "learning_rate": 7.111115918229086e-06, "loss": 0.777, "step": 7127 }, { "epoch": 1.966344827586207, "grad_norm": 3.650996446609497, "learning_rate": 7.110288891477459e-06, "loss": 0.8573, "step": 7128 }, { "epoch": 1.9666206896551723, "grad_norm": 3.9509310722351074, "learning_rate": 7.109461794470994e-06, "loss": 0.9087, "step": 7129 }, { "epoch": 1.966896551724138, "grad_norm": 3.7636287212371826, "learning_rate": 7.108634627237224e-06, "loss": 0.8449, "step": 7130 }, { "epoch": 1.9671724137931035, "grad_norm": 4.190186977386475, "learning_rate": 7.107807389803687e-06, "loss": 0.6708, "step": 7131 }, { "epoch": 1.9674482758620688, "grad_norm": 4.180857181549072, "learning_rate": 7.106980082197923e-06, "loss": 0.7765, "step": 7132 }, { "epoch": 1.9677241379310346, "grad_norm": 3.77866530418396, "learning_rate": 7.106152704447473e-06, "loss": 0.7379, "step": 7133 }, { "epoch": 1.968, "grad_norm": 3.738304376602173, "learning_rate": 7.105325256579883e-06, "loss": 0.6882, "step": 7134 }, { "epoch": 1.9682758620689655, "grad_norm": 3.726914405822754, "learning_rate": 7.104497738622702e-06, "loss": 0.699, "step": 7135 }, { "epoch": 1.9685517241379311, "grad_norm": 4.292017459869385, "learning_rate": 7.103670150603475e-06, "loss": 0.9502, "step": 7136 }, { "epoch": 1.9688275862068965, "grad_norm": 3.941406011581421, "learning_rate": 7.102842492549758e-06, "loss": 0.7364, "step": 7137 }, { "epoch": 1.969103448275862, "grad_norm": 4.02473258972168, "learning_rate": 7.102014764489103e-06, "loss": 0.8569, "step": 7138 }, { "epoch": 1.9693793103448276, "grad_norm": 3.6446704864501953, "learning_rate": 7.101186966449065e-06, "loss": 0.75, "step": 7139 }, { "epoch": 1.969655172413793, "grad_norm": 4.447521209716797, "learning_rate": 7.100359098457207e-06, "loss": 0.8613, "step": 7140 }, { "epoch": 1.9699310344827587, "grad_norm": 4.289312362670898, "learning_rate": 7.099531160541085e-06, "loss": 0.7868, "step": 7141 }, { "epoch": 1.970206896551724, "grad_norm": 3.879518985748291, "learning_rate": 7.098703152728266e-06, "loss": 0.592, "step": 7142 }, { "epoch": 1.9704827586206897, "grad_norm": 3.946086883544922, "learning_rate": 7.097875075046312e-06, "loss": 0.7927, "step": 7143 }, { "epoch": 1.9707586206896552, "grad_norm": 4.1851806640625, "learning_rate": 7.097046927522796e-06, "loss": 0.8262, "step": 7144 }, { "epoch": 1.9710344827586206, "grad_norm": 4.068704128265381, "learning_rate": 7.0962187101852845e-06, "loss": 0.7723, "step": 7145 }, { "epoch": 1.9713103448275862, "grad_norm": 3.7938785552978516, "learning_rate": 7.095390423061353e-06, "loss": 0.7913, "step": 7146 }, { "epoch": 1.9715862068965517, "grad_norm": 4.129289627075195, "learning_rate": 7.0945620661785715e-06, "loss": 0.7509, "step": 7147 }, { "epoch": 1.971862068965517, "grad_norm": 3.877717971801758, "learning_rate": 7.093733639564524e-06, "loss": 0.8021, "step": 7148 }, { "epoch": 1.9721379310344829, "grad_norm": 3.9223439693450928, "learning_rate": 7.092905143246785e-06, "loss": 0.8684, "step": 7149 }, { "epoch": 1.9724137931034482, "grad_norm": 3.762894868850708, "learning_rate": 7.092076577252939e-06, "loss": 0.7746, "step": 7150 }, { "epoch": 1.9726896551724138, "grad_norm": 3.3289132118225098, "learning_rate": 7.091247941610569e-06, "loss": 0.7637, "step": 7151 }, { "epoch": 1.9729655172413794, "grad_norm": 3.869534492492676, "learning_rate": 7.090419236347261e-06, "loss": 0.8151, "step": 7152 }, { "epoch": 1.9732413793103447, "grad_norm": 3.905548572540283, "learning_rate": 7.089590461490606e-06, "loss": 0.8104, "step": 7153 }, { "epoch": 1.9735172413793105, "grad_norm": 4.175549030303955, "learning_rate": 7.0887616170681936e-06, "loss": 0.8264, "step": 7154 }, { "epoch": 1.9737931034482759, "grad_norm": 3.782796621322632, "learning_rate": 7.087932703107618e-06, "loss": 0.7481, "step": 7155 }, { "epoch": 1.9740689655172414, "grad_norm": 3.869016408920288, "learning_rate": 7.087103719636476e-06, "loss": 0.6181, "step": 7156 }, { "epoch": 1.974344827586207, "grad_norm": 4.086568355560303, "learning_rate": 7.086274666682363e-06, "loss": 0.803, "step": 7157 }, { "epoch": 1.9746206896551723, "grad_norm": 3.6009528636932373, "learning_rate": 7.0854455442728835e-06, "loss": 0.7409, "step": 7158 }, { "epoch": 1.974896551724138, "grad_norm": 4.062264442443848, "learning_rate": 7.084616352435636e-06, "loss": 0.6891, "step": 7159 }, { "epoch": 1.9751724137931035, "grad_norm": 4.1506218910217285, "learning_rate": 7.083787091198229e-06, "loss": 0.7459, "step": 7160 }, { "epoch": 1.9754482758620688, "grad_norm": 4.114179611206055, "learning_rate": 7.082957760588266e-06, "loss": 0.7692, "step": 7161 }, { "epoch": 1.9757241379310346, "grad_norm": 3.839545726776123, "learning_rate": 7.082128360633362e-06, "loss": 0.7046, "step": 7162 }, { "epoch": 1.976, "grad_norm": 3.512052059173584, "learning_rate": 7.081298891361125e-06, "loss": 0.6805, "step": 7163 }, { "epoch": 1.9762758620689655, "grad_norm": 4.096072196960449, "learning_rate": 7.0804693527991705e-06, "loss": 0.8918, "step": 7164 }, { "epoch": 1.9765517241379311, "grad_norm": 3.981706380844116, "learning_rate": 7.0796397449751155e-06, "loss": 0.8383, "step": 7165 }, { "epoch": 1.9768275862068965, "grad_norm": 3.996319055557251, "learning_rate": 7.0788100679165795e-06, "loss": 0.8076, "step": 7166 }, { "epoch": 1.977103448275862, "grad_norm": 4.222985744476318, "learning_rate": 7.077980321651182e-06, "loss": 0.7663, "step": 7167 }, { "epoch": 1.9773793103448276, "grad_norm": 3.767665147781372, "learning_rate": 7.077150506206547e-06, "loss": 0.777, "step": 7168 }, { "epoch": 1.977655172413793, "grad_norm": 3.997614860534668, "learning_rate": 7.076320621610302e-06, "loss": 0.7351, "step": 7169 }, { "epoch": 1.9779310344827588, "grad_norm": 3.685438394546509, "learning_rate": 7.075490667890074e-06, "loss": 0.6728, "step": 7170 }, { "epoch": 1.978206896551724, "grad_norm": 3.669538974761963, "learning_rate": 7.0746606450734925e-06, "loss": 0.6927, "step": 7171 }, { "epoch": 1.9784827586206897, "grad_norm": 3.9816858768463135, "learning_rate": 7.073830553188191e-06, "loss": 0.8412, "step": 7172 }, { "epoch": 1.9787586206896552, "grad_norm": 4.216297149658203, "learning_rate": 7.0730003922618065e-06, "loss": 0.8035, "step": 7173 }, { "epoch": 1.9790344827586206, "grad_norm": 4.153724193572998, "learning_rate": 7.072170162321974e-06, "loss": 0.6103, "step": 7174 }, { "epoch": 1.9793103448275862, "grad_norm": 4.131069183349609, "learning_rate": 7.071339863396333e-06, "loss": 0.788, "step": 7175 }, { "epoch": 1.9795862068965517, "grad_norm": 4.115523815155029, "learning_rate": 7.0705094955125275e-06, "loss": 0.8884, "step": 7176 }, { "epoch": 1.979862068965517, "grad_norm": 4.1286468505859375, "learning_rate": 7.069679058698201e-06, "loss": 0.691, "step": 7177 }, { "epoch": 1.9801379310344829, "grad_norm": 4.100991725921631, "learning_rate": 7.068848552980998e-06, "loss": 0.9174, "step": 7178 }, { "epoch": 1.9804137931034482, "grad_norm": 4.3963518142700195, "learning_rate": 7.068017978388569e-06, "loss": 0.9417, "step": 7179 }, { "epoch": 1.9806896551724138, "grad_norm": 4.037464618682861, "learning_rate": 7.067187334948567e-06, "loss": 0.9103, "step": 7180 }, { "epoch": 1.9809655172413794, "grad_norm": 4.069061756134033, "learning_rate": 7.066356622688641e-06, "loss": 0.8709, "step": 7181 }, { "epoch": 1.9812413793103447, "grad_norm": 3.6102309226989746, "learning_rate": 7.065525841636452e-06, "loss": 0.6554, "step": 7182 }, { "epoch": 1.9815172413793103, "grad_norm": 3.8459463119506836, "learning_rate": 7.064694991819654e-06, "loss": 0.9262, "step": 7183 }, { "epoch": 1.9817931034482759, "grad_norm": 3.8760392665863037, "learning_rate": 7.063864073265907e-06, "loss": 0.7773, "step": 7184 }, { "epoch": 1.9820689655172414, "grad_norm": 4.039271831512451, "learning_rate": 7.063033086002877e-06, "loss": 0.883, "step": 7185 }, { "epoch": 1.982344827586207, "grad_norm": 3.9324331283569336, "learning_rate": 7.0622020300582274e-06, "loss": 0.7027, "step": 7186 }, { "epoch": 1.9826206896551724, "grad_norm": 4.293285369873047, "learning_rate": 7.0613709054596235e-06, "loss": 0.8848, "step": 7187 }, { "epoch": 1.982896551724138, "grad_norm": 3.442049741744995, "learning_rate": 7.060539712234737e-06, "loss": 0.6549, "step": 7188 }, { "epoch": 1.9831724137931035, "grad_norm": 3.4368138313293457, "learning_rate": 7.059708450411238e-06, "loss": 0.6621, "step": 7189 }, { "epoch": 1.9834482758620688, "grad_norm": 3.8519771099090576, "learning_rate": 7.058877120016801e-06, "loss": 0.896, "step": 7190 }, { "epoch": 1.9837241379310346, "grad_norm": 3.7075037956237793, "learning_rate": 7.0580457210791046e-06, "loss": 0.6533, "step": 7191 }, { "epoch": 1.984, "grad_norm": 3.88105845451355, "learning_rate": 7.057214253625825e-06, "loss": 0.8081, "step": 7192 }, { "epoch": 1.9842758620689656, "grad_norm": 4.440068244934082, "learning_rate": 7.056382717684642e-06, "loss": 0.8131, "step": 7193 }, { "epoch": 1.9845517241379311, "grad_norm": 3.787811756134033, "learning_rate": 7.055551113283243e-06, "loss": 0.8378, "step": 7194 }, { "epoch": 1.9848275862068965, "grad_norm": 4.290619850158691, "learning_rate": 7.054719440449308e-06, "loss": 0.8846, "step": 7195 }, { "epoch": 1.985103448275862, "grad_norm": 4.088586807250977, "learning_rate": 7.053887699210529e-06, "loss": 0.8142, "step": 7196 }, { "epoch": 1.9853793103448276, "grad_norm": 4.056311130523682, "learning_rate": 7.053055889594593e-06, "loss": 0.8091, "step": 7197 }, { "epoch": 1.985655172413793, "grad_norm": 3.936124324798584, "learning_rate": 7.052224011629194e-06, "loss": 0.8021, "step": 7198 }, { "epoch": 1.9859310344827588, "grad_norm": 4.219254970550537, "learning_rate": 7.051392065342026e-06, "loss": 0.7598, "step": 7199 }, { "epoch": 1.986206896551724, "grad_norm": 3.854485273361206, "learning_rate": 7.050560050760786e-06, "loss": 0.7465, "step": 7200 }, { "epoch": 1.9864827586206897, "grad_norm": 4.2169060707092285, "learning_rate": 7.0497279679131735e-06, "loss": 0.9563, "step": 7201 }, { "epoch": 1.9867586206896553, "grad_norm": 3.9590532779693604, "learning_rate": 7.048895816826889e-06, "loss": 0.821, "step": 7202 }, { "epoch": 1.9870344827586206, "grad_norm": 4.090304374694824, "learning_rate": 7.048063597529637e-06, "loss": 0.7492, "step": 7203 }, { "epoch": 1.9873103448275862, "grad_norm": 4.003555774688721, "learning_rate": 7.047231310049123e-06, "loss": 0.77, "step": 7204 }, { "epoch": 1.9875862068965517, "grad_norm": 4.145306587219238, "learning_rate": 7.046398954413054e-06, "loss": 0.8719, "step": 7205 }, { "epoch": 1.987862068965517, "grad_norm": 3.8236215114593506, "learning_rate": 7.045566530649143e-06, "loss": 0.7862, "step": 7206 }, { "epoch": 1.9881379310344829, "grad_norm": 3.823350667953491, "learning_rate": 7.044734038785099e-06, "loss": 0.6952, "step": 7207 }, { "epoch": 1.9884137931034482, "grad_norm": 3.9500372409820557, "learning_rate": 7.043901478848641e-06, "loss": 0.7948, "step": 7208 }, { "epoch": 1.9886896551724138, "grad_norm": 4.113548755645752, "learning_rate": 7.043068850867483e-06, "loss": 0.8234, "step": 7209 }, { "epoch": 1.9889655172413794, "grad_norm": 3.8874897956848145, "learning_rate": 7.042236154869346e-06, "loss": 0.7818, "step": 7210 }, { "epoch": 1.9892413793103447, "grad_norm": 4.168547630310059, "learning_rate": 7.0414033908819526e-06, "loss": 0.8583, "step": 7211 }, { "epoch": 1.9895172413793103, "grad_norm": 4.135387897491455, "learning_rate": 7.0405705589330255e-06, "loss": 0.7493, "step": 7212 }, { "epoch": 1.9897931034482759, "grad_norm": 3.429952621459961, "learning_rate": 7.03973765905029e-06, "loss": 0.6555, "step": 7213 }, { "epoch": 1.9900689655172412, "grad_norm": 4.378201007843018, "learning_rate": 7.038904691261478e-06, "loss": 0.8185, "step": 7214 }, { "epoch": 1.990344827586207, "grad_norm": 4.088320255279541, "learning_rate": 7.038071655594316e-06, "loss": 0.8543, "step": 7215 }, { "epoch": 1.9906206896551724, "grad_norm": 3.8328871726989746, "learning_rate": 7.037238552076541e-06, "loss": 0.8003, "step": 7216 }, { "epoch": 1.990896551724138, "grad_norm": 4.2699785232543945, "learning_rate": 7.036405380735886e-06, "loss": 0.8223, "step": 7217 }, { "epoch": 1.9911724137931035, "grad_norm": 3.823939800262451, "learning_rate": 7.03557214160009e-06, "loss": 0.8135, "step": 7218 }, { "epoch": 1.9914482758620689, "grad_norm": 3.7805469036102295, "learning_rate": 7.03473883469689e-06, "loss": 0.7805, "step": 7219 }, { "epoch": 1.9917241379310346, "grad_norm": 4.028633117675781, "learning_rate": 7.033905460054033e-06, "loss": 0.7566, "step": 7220 }, { "epoch": 1.992, "grad_norm": 4.098030090332031, "learning_rate": 7.03307201769926e-06, "loss": 0.8113, "step": 7221 }, { "epoch": 1.9922758620689656, "grad_norm": 3.9514942169189453, "learning_rate": 7.032238507660316e-06, "loss": 0.8778, "step": 7222 }, { "epoch": 1.9925517241379311, "grad_norm": 3.4396913051605225, "learning_rate": 7.031404929964955e-06, "loss": 0.7576, "step": 7223 }, { "epoch": 1.9928275862068965, "grad_norm": 3.8882839679718018, "learning_rate": 7.030571284640922e-06, "loss": 0.7206, "step": 7224 }, { "epoch": 1.993103448275862, "grad_norm": 3.954462766647339, "learning_rate": 7.0297375717159764e-06, "loss": 0.7225, "step": 7225 }, { "epoch": 1.9933793103448276, "grad_norm": 3.2970499992370605, "learning_rate": 7.028903791217869e-06, "loss": 0.7098, "step": 7226 }, { "epoch": 1.993655172413793, "grad_norm": 3.8728408813476562, "learning_rate": 7.0280699431743605e-06, "loss": 0.692, "step": 7227 }, { "epoch": 1.9939310344827588, "grad_norm": 4.482330799102783, "learning_rate": 7.027236027613209e-06, "loss": 0.7487, "step": 7228 }, { "epoch": 1.9942068965517241, "grad_norm": 4.029712677001953, "learning_rate": 7.0264020445621796e-06, "loss": 0.8048, "step": 7229 }, { "epoch": 1.9944827586206897, "grad_norm": 3.7711424827575684, "learning_rate": 7.025567994049034e-06, "loss": 0.829, "step": 7230 }, { "epoch": 1.9947586206896553, "grad_norm": 4.005157947540283, "learning_rate": 7.024733876101542e-06, "loss": 0.8548, "step": 7231 }, { "epoch": 1.9950344827586206, "grad_norm": 4.1595892906188965, "learning_rate": 7.023899690747469e-06, "loss": 0.8466, "step": 7232 }, { "epoch": 1.9953103448275862, "grad_norm": 3.9603123664855957, "learning_rate": 7.023065438014589e-06, "loss": 0.9583, "step": 7233 }, { "epoch": 1.9955862068965518, "grad_norm": 4.002429962158203, "learning_rate": 7.0222311179306745e-06, "loss": 0.8397, "step": 7234 }, { "epoch": 1.995862068965517, "grad_norm": 3.7970669269561768, "learning_rate": 7.021396730523502e-06, "loss": 0.9169, "step": 7235 }, { "epoch": 1.996137931034483, "grad_norm": 3.9241790771484375, "learning_rate": 7.02056227582085e-06, "loss": 0.7447, "step": 7236 }, { "epoch": 1.9964137931034482, "grad_norm": 4.396383285522461, "learning_rate": 7.019727753850498e-06, "loss": 0.8577, "step": 7237 }, { "epoch": 1.9966896551724138, "grad_norm": 4.304017066955566, "learning_rate": 7.018893164640228e-06, "loss": 0.8486, "step": 7238 }, { "epoch": 1.9969655172413794, "grad_norm": 3.8428850173950195, "learning_rate": 7.018058508217827e-06, "loss": 0.8323, "step": 7239 }, { "epoch": 1.9972413793103447, "grad_norm": 3.7419228553771973, "learning_rate": 7.0172237846110794e-06, "loss": 0.6473, "step": 7240 }, { "epoch": 1.9975172413793103, "grad_norm": 3.5619852542877197, "learning_rate": 7.0163889938477754e-06, "loss": 0.7928, "step": 7241 }, { "epoch": 1.9977931034482759, "grad_norm": 3.986541748046875, "learning_rate": 7.015554135955708e-06, "loss": 0.7839, "step": 7242 }, { "epoch": 1.9980689655172412, "grad_norm": 3.8137261867523193, "learning_rate": 7.014719210962668e-06, "loss": 0.7089, "step": 7243 }, { "epoch": 1.998344827586207, "grad_norm": 4.22244119644165, "learning_rate": 7.013884218896454e-06, "loss": 0.9034, "step": 7244 }, { "epoch": 1.9986206896551724, "grad_norm": 3.6880364418029785, "learning_rate": 7.013049159784863e-06, "loss": 0.7571, "step": 7245 }, { "epoch": 1.998896551724138, "grad_norm": 4.121937274932861, "learning_rate": 7.012214033655694e-06, "loss": 0.9978, "step": 7246 }, { "epoch": 1.9991724137931035, "grad_norm": 4.079460620880127, "learning_rate": 7.0113788405367536e-06, "loss": 0.7805, "step": 7247 }, { "epoch": 1.9994482758620689, "grad_norm": 3.8698272705078125, "learning_rate": 7.0105435804558445e-06, "loss": 0.8523, "step": 7248 }, { "epoch": 1.9997241379310344, "grad_norm": 3.7464234828948975, "learning_rate": 7.0097082534407716e-06, "loss": 0.8045, "step": 7249 }, { "epoch": 2.0, "grad_norm": 3.4739015102386475, "learning_rate": 7.008872859519348e-06, "loss": 0.9084, "step": 7250 } ], "logging_steps": 1.0, "max_steps": 18125, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.2740901683716424e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }