{ "best_metric": 11.5, "best_model_checkpoint": "miner_id_24/checkpoint-300", "epoch": 0.16387286197125397, "eval_steps": 300, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001365607183093783, "grad_norm": 9.676669833424967e-06, "learning_rate": 2e-05, "loss": 46.0, "step": 1 }, { "epoch": 0.0001365607183093783, "eval_loss": 11.5, "eval_runtime": 20.0819, "eval_samples_per_second": 153.571, "eval_steps_per_second": 76.786, "step": 1 }, { "epoch": 0.0002731214366187566, "grad_norm": 8.113798685371876e-06, "learning_rate": 4e-05, "loss": 46.0, "step": 2 }, { "epoch": 0.0004096821549281349, "grad_norm": 4.353820713731693e-06, "learning_rate": 6e-05, "loss": 46.0, "step": 3 }, { "epoch": 0.0005462428732375132, "grad_norm": 5.62772765988484e-06, "learning_rate": 8e-05, "loss": 46.0, "step": 4 }, { "epoch": 0.0006828035915468915, "grad_norm": 6.43259363641846e-06, "learning_rate": 0.0001, "loss": 46.0, "step": 5 }, { "epoch": 0.0008193643098562698, "grad_norm": 9.933234650816303e-06, "learning_rate": 0.00012, "loss": 46.0, "step": 6 }, { "epoch": 0.0009559250281656481, "grad_norm": 6.729349024681142e-06, "learning_rate": 0.00014, "loss": 46.0, "step": 7 }, { "epoch": 0.0010924857464750264, "grad_norm": 7.317645668081241e-06, "learning_rate": 0.00016, "loss": 46.0, "step": 8 }, { "epoch": 0.0012290464647844047, "grad_norm": 7.1731265052221715e-06, "learning_rate": 0.00018, "loss": 46.0, "step": 9 }, { "epoch": 0.001365607183093783, "grad_norm": 7.621179065608885e-06, "learning_rate": 0.0002, "loss": 46.0, "step": 10 }, { "epoch": 0.0015021679014031613, "grad_norm": 1.141078882938018e-05, "learning_rate": 0.00019999994480149276, "loss": 46.0, "step": 11 }, { "epoch": 0.0016387286197125396, "grad_norm": 9.140413567365613e-06, "learning_rate": 0.00019999977920603197, "loss": 46.0, "step": 12 }, { "epoch": 0.001775289338021918, "grad_norm": 1.240484562003985e-05, "learning_rate": 0.0001999995032138004, "loss": 46.0, "step": 13 }, { "epoch": 0.0019118500563312963, "grad_norm": 1.1790569260483608e-05, "learning_rate": 0.00019999911682510278, "loss": 46.0, "step": 14 }, { "epoch": 0.0020484107746406746, "grad_norm": 1.051307117450051e-05, "learning_rate": 0.00019999862004036568, "loss": 46.0, "step": 15 }, { "epoch": 0.0021849714929500527, "grad_norm": 9.398099791724235e-06, "learning_rate": 0.0001999980128601375, "loss": 46.0, "step": 16 }, { "epoch": 0.0023215322112594312, "grad_norm": 7.582193120470038e-06, "learning_rate": 0.00019999729528508855, "loss": 46.0, "step": 17 }, { "epoch": 0.0024580929295688093, "grad_norm": 1.940154834301211e-05, "learning_rate": 0.00019999646731601103, "loss": 46.0, "step": 18 }, { "epoch": 0.002594653647878188, "grad_norm": 7.919963536551222e-06, "learning_rate": 0.00019999552895381902, "loss": 46.0, "step": 19 }, { "epoch": 0.002731214366187566, "grad_norm": 1.3096555449010339e-05, "learning_rate": 0.0001999944801995484, "loss": 46.0, "step": 20 }, { "epoch": 0.0028677750844969445, "grad_norm": 1.2133096788602415e-05, "learning_rate": 0.00019999332105435696, "loss": 46.0, "step": 21 }, { "epoch": 0.0030043358028063226, "grad_norm": 3.7125832022866234e-05, "learning_rate": 0.00019999205151952437, "loss": 46.0, "step": 22 }, { "epoch": 0.003140896521115701, "grad_norm": 1.1414869732107036e-05, "learning_rate": 0.0001999906715964522, "loss": 46.0, "step": 23 }, { "epoch": 0.0032774572394250793, "grad_norm": 1.2319793313508853e-05, "learning_rate": 0.0001999891812866638, "loss": 46.0, "step": 24 }, { "epoch": 0.003414017957734458, "grad_norm": 1.1083939170930535e-05, "learning_rate": 0.00019998758059180447, "loss": 46.0, "step": 25 }, { "epoch": 0.003550578676043836, "grad_norm": 1.0406022738607135e-05, "learning_rate": 0.00019998586951364125, "loss": 46.0, "step": 26 }, { "epoch": 0.0036871393943532144, "grad_norm": 8.916207661968656e-06, "learning_rate": 0.0001999840480540632, "loss": 46.0, "step": 27 }, { "epoch": 0.0038237001126625926, "grad_norm": 8.375522156711668e-06, "learning_rate": 0.0001999821162150811, "loss": 46.0, "step": 28 }, { "epoch": 0.003960260830971971, "grad_norm": 5.647367288474925e-06, "learning_rate": 0.00019998007399882765, "loss": 46.0, "step": 29 }, { "epoch": 0.004096821549281349, "grad_norm": 2.2721666027791798e-05, "learning_rate": 0.00019997792140755746, "loss": 46.0, "step": 30 }, { "epoch": 0.004233382267590727, "grad_norm": 1.8854540030588396e-05, "learning_rate": 0.00019997565844364688, "loss": 46.0, "step": 31 }, { "epoch": 0.004369942985900105, "grad_norm": 8.220870768127497e-06, "learning_rate": 0.00019997328510959413, "loss": 46.0, "step": 32 }, { "epoch": 0.004506503704209484, "grad_norm": 7.226680736494018e-06, "learning_rate": 0.00019997080140801932, "loss": 46.0, "step": 33 }, { "epoch": 0.0046430644225188625, "grad_norm": 4.141399131185608e-06, "learning_rate": 0.0001999682073416644, "loss": 46.0, "step": 34 }, { "epoch": 0.004779625140828241, "grad_norm": 1.651368074817583e-05, "learning_rate": 0.00019996550291339311, "loss": 46.0, "step": 35 }, { "epoch": 0.004916185859137619, "grad_norm": 2.9052245736238547e-05, "learning_rate": 0.00019996268812619107, "loss": 46.0, "step": 36 }, { "epoch": 0.005052746577446998, "grad_norm": 9.355511792819016e-06, "learning_rate": 0.00019995976298316576, "loss": 46.0, "step": 37 }, { "epoch": 0.005189307295756376, "grad_norm": 1.852245804911945e-05, "learning_rate": 0.00019995672748754638, "loss": 46.0, "step": 38 }, { "epoch": 0.005325868014065754, "grad_norm": 2.2482845452032052e-05, "learning_rate": 0.0001999535816426841, "loss": 46.0, "step": 39 }, { "epoch": 0.005462428732375132, "grad_norm": 4.135522249271162e-05, "learning_rate": 0.0001999503254520518, "loss": 46.0, "step": 40 }, { "epoch": 0.005598989450684511, "grad_norm": 4.387225635582581e-05, "learning_rate": 0.0001999469589192442, "loss": 46.0, "step": 41 }, { "epoch": 0.005735550168993889, "grad_norm": 1.1143504707433749e-05, "learning_rate": 0.00019994348204797788, "loss": 46.0, "step": 42 }, { "epoch": 0.005872110887303267, "grad_norm": 1.7061322068911977e-05, "learning_rate": 0.00019993989484209118, "loss": 46.0, "step": 43 }, { "epoch": 0.006008671605612645, "grad_norm": 3.7752193748019636e-05, "learning_rate": 0.0001999361973055443, "loss": 46.0, "step": 44 }, { "epoch": 0.006145232323922024, "grad_norm": 2.867165494535584e-05, "learning_rate": 0.0001999323894424192, "loss": 46.0, "step": 45 }, { "epoch": 0.006281793042231402, "grad_norm": 1.418732153979363e-05, "learning_rate": 0.0001999284712569196, "loss": 46.0, "step": 46 }, { "epoch": 0.00641835376054078, "grad_norm": 5.592720117419958e-05, "learning_rate": 0.00019992444275337114, "loss": 46.0, "step": 47 }, { "epoch": 0.0065549144788501585, "grad_norm": 7.95181404100731e-05, "learning_rate": 0.0001999203039362211, "loss": 46.0, "step": 48 }, { "epoch": 0.006691475197159537, "grad_norm": 2.4339618903468363e-05, "learning_rate": 0.00019991605481003866, "loss": 46.0, "step": 49 }, { "epoch": 0.006828035915468916, "grad_norm": 3.441837543505244e-05, "learning_rate": 0.00019991169537951468, "loss": 46.0, "step": 50 }, { "epoch": 0.006964596633778294, "grad_norm": 1.7208472854690626e-05, "learning_rate": 0.0001999072256494619, "loss": 46.0, "step": 51 }, { "epoch": 0.007101157352087672, "grad_norm": 1.2097309991077054e-05, "learning_rate": 0.00019990264562481472, "loss": 46.0, "step": 52 }, { "epoch": 0.00723771807039705, "grad_norm": 1.3584556654677726e-05, "learning_rate": 0.00019989795531062936, "loss": 46.0, "step": 53 }, { "epoch": 0.007374278788706429, "grad_norm": 2.912199306592811e-05, "learning_rate": 0.00019989315471208378, "loss": 46.0, "step": 54 }, { "epoch": 0.007510839507015807, "grad_norm": 2.0452795070013963e-05, "learning_rate": 0.00019988824383447776, "loss": 46.0, "step": 55 }, { "epoch": 0.007647400225325185, "grad_norm": 1.590143256180454e-05, "learning_rate": 0.00019988322268323268, "loss": 46.0, "step": 56 }, { "epoch": 0.007783960943634563, "grad_norm": 1.094182789529441e-05, "learning_rate": 0.00019987809126389177, "loss": 46.0, "step": 57 }, { "epoch": 0.007920521661943942, "grad_norm": 1.5657904441468418e-05, "learning_rate": 0.00019987284958211996, "loss": 46.0, "step": 58 }, { "epoch": 0.00805708238025332, "grad_norm": 1.1172323866048828e-05, "learning_rate": 0.00019986749764370392, "loss": 46.0, "step": 59 }, { "epoch": 0.008193643098562698, "grad_norm": 1.1931785593333188e-05, "learning_rate": 0.00019986203545455203, "loss": 46.0, "step": 60 }, { "epoch": 0.008330203816872076, "grad_norm": 2.3814011001377366e-05, "learning_rate": 0.0001998564630206944, "loss": 46.0, "step": 61 }, { "epoch": 0.008466764535181455, "grad_norm": 1.1059896678489167e-05, "learning_rate": 0.0001998507803482828, "loss": 46.0, "step": 62 }, { "epoch": 0.008603325253490833, "grad_norm": 1.2055512343067676e-05, "learning_rate": 0.00019984498744359075, "loss": 46.0, "step": 63 }, { "epoch": 0.00873988597180021, "grad_norm": 3.0810657335678115e-05, "learning_rate": 0.00019983908431301343, "loss": 46.0, "step": 64 }, { "epoch": 0.00887644669010959, "grad_norm": 2.3847031116019934e-05, "learning_rate": 0.0001998330709630677, "loss": 46.0, "step": 65 }, { "epoch": 0.009013007408418969, "grad_norm": 2.5676057703094557e-05, "learning_rate": 0.0001998269474003922, "loss": 46.0, "step": 66 }, { "epoch": 0.009149568126728347, "grad_norm": 1.9099008568446152e-05, "learning_rate": 0.0001998207136317471, "loss": 46.0, "step": 67 }, { "epoch": 0.009286128845037725, "grad_norm": 1.620809234736953e-05, "learning_rate": 0.00019981436966401425, "loss": 46.0, "step": 68 }, { "epoch": 0.009422689563347103, "grad_norm": 2.771842628135346e-05, "learning_rate": 0.00019980791550419728, "loss": 46.0, "step": 69 }, { "epoch": 0.009559250281656481, "grad_norm": 1.6155812772922218e-05, "learning_rate": 0.00019980135115942136, "loss": 46.0, "step": 70 }, { "epoch": 0.00969581099996586, "grad_norm": 1.4296605513663962e-05, "learning_rate": 0.00019979467663693332, "loss": 46.0, "step": 71 }, { "epoch": 0.009832371718275237, "grad_norm": 2.5244138669222593e-05, "learning_rate": 0.00019978789194410168, "loss": 46.0, "step": 72 }, { "epoch": 0.009968932436584617, "grad_norm": 2.8768767151632346e-05, "learning_rate": 0.00019978099708841646, "loss": 46.0, "step": 73 }, { "epoch": 0.010105493154893995, "grad_norm": 2.3254493498825468e-05, "learning_rate": 0.00019977399207748943, "loss": 46.0, "step": 74 }, { "epoch": 0.010242053873203373, "grad_norm": 2.780866634566337e-05, "learning_rate": 0.00019976687691905393, "loss": 46.0, "step": 75 }, { "epoch": 0.010378614591512752, "grad_norm": 9.848828085523564e-06, "learning_rate": 0.00019975965162096485, "loss": 46.0, "step": 76 }, { "epoch": 0.01051517530982213, "grad_norm": 1.7771888451534323e-05, "learning_rate": 0.00019975231619119867, "loss": 46.0, "step": 77 }, { "epoch": 0.010651736028131508, "grad_norm": 2.449357634759508e-05, "learning_rate": 0.00019974487063785355, "loss": 46.0, "step": 78 }, { "epoch": 0.010788296746440886, "grad_norm": 2.3659646103624254e-05, "learning_rate": 0.00019973731496914914, "loss": 46.0, "step": 79 }, { "epoch": 0.010924857464750264, "grad_norm": 4.919664206681773e-05, "learning_rate": 0.00019972964919342663, "loss": 46.0, "step": 80 }, { "epoch": 0.011061418183059642, "grad_norm": 3.88891676266212e-05, "learning_rate": 0.00019972187331914886, "loss": 46.0, "step": 81 }, { "epoch": 0.011197978901369022, "grad_norm": 5.824596883030608e-05, "learning_rate": 0.00019971398735490014, "loss": 46.0, "step": 82 }, { "epoch": 0.0113345396196784, "grad_norm": 2.443828088871669e-05, "learning_rate": 0.00019970599130938633, "loss": 46.0, "step": 83 }, { "epoch": 0.011471100337987778, "grad_norm": 1.806908221624326e-05, "learning_rate": 0.0001996978851914349, "loss": 46.0, "step": 84 }, { "epoch": 0.011607661056297156, "grad_norm": 1.9875111320288852e-05, "learning_rate": 0.00019968966900999464, "loss": 46.0, "step": 85 }, { "epoch": 0.011744221774606534, "grad_norm": 2.811396734614391e-05, "learning_rate": 0.00019968134277413606, "loss": 46.0, "step": 86 }, { "epoch": 0.011880782492915912, "grad_norm": 2.5689738322398625e-05, "learning_rate": 0.00019967290649305103, "loss": 46.0, "step": 87 }, { "epoch": 0.01201734321122529, "grad_norm": 4.2773946915986016e-05, "learning_rate": 0.00019966436017605297, "loss": 46.0, "step": 88 }, { "epoch": 0.012153903929534669, "grad_norm": 0.00021493734675459564, "learning_rate": 0.00019965570383257677, "loss": 46.0, "step": 89 }, { "epoch": 0.012290464647844048, "grad_norm": 0.00024037304683588445, "learning_rate": 0.00019964693747217874, "loss": 46.0, "step": 90 }, { "epoch": 0.012427025366153427, "grad_norm": 4.650273695006035e-05, "learning_rate": 0.00019963806110453672, "loss": 46.0, "step": 91 }, { "epoch": 0.012563586084462805, "grad_norm": 2.4313370886375196e-05, "learning_rate": 0.00019962907473944995, "loss": 46.0, "step": 92 }, { "epoch": 0.012700146802772183, "grad_norm": 1.7518057575216517e-05, "learning_rate": 0.00019961997838683905, "loss": 46.0, "step": 93 }, { "epoch": 0.01283670752108156, "grad_norm": 3.366146847838536e-05, "learning_rate": 0.00019961077205674622, "loss": 46.0, "step": 94 }, { "epoch": 0.012973268239390939, "grad_norm": 4.310454460210167e-05, "learning_rate": 0.00019960145575933486, "loss": 46.0, "step": 95 }, { "epoch": 0.013109828957700317, "grad_norm": 4.665861342800781e-05, "learning_rate": 0.00019959202950489, "loss": 46.0, "step": 96 }, { "epoch": 0.013246389676009695, "grad_norm": 2.579794090706855e-05, "learning_rate": 0.00019958249330381787, "loss": 46.0, "step": 97 }, { "epoch": 0.013382950394319073, "grad_norm": 0.00010568476136540994, "learning_rate": 0.00019957284716664618, "loss": 46.0, "step": 98 }, { "epoch": 0.013519511112628453, "grad_norm": 3.9795751945348457e-05, "learning_rate": 0.00019956309110402397, "loss": 46.0, "step": 99 }, { "epoch": 0.013656071830937831, "grad_norm": 2.9642118533956818e-05, "learning_rate": 0.00019955322512672162, "loss": 46.0, "step": 100 }, { "epoch": 0.01379263254924721, "grad_norm": 1.9580385924200527e-05, "learning_rate": 0.00019954324924563089, "loss": 46.0, "step": 101 }, { "epoch": 0.013929193267556587, "grad_norm": 3.410055433050729e-05, "learning_rate": 0.00019953316347176488, "loss": 46.0, "step": 102 }, { "epoch": 0.014065753985865966, "grad_norm": 5.128211705596186e-05, "learning_rate": 0.00019952296781625795, "loss": 46.0, "step": 103 }, { "epoch": 0.014202314704175344, "grad_norm": 3.538643795764074e-05, "learning_rate": 0.0001995126622903658, "loss": 46.0, "step": 104 }, { "epoch": 0.014338875422484722, "grad_norm": 2.4323409888893366e-05, "learning_rate": 0.00019950224690546545, "loss": 46.0, "step": 105 }, { "epoch": 0.0144754361407941, "grad_norm": 3.726944123627618e-05, "learning_rate": 0.00019949172167305516, "loss": 46.0, "step": 106 }, { "epoch": 0.01461199685910348, "grad_norm": 3.0351222449098714e-05, "learning_rate": 0.00019948108660475445, "loss": 46.0, "step": 107 }, { "epoch": 0.014748557577412858, "grad_norm": 3.8205947930691764e-05, "learning_rate": 0.0001994703417123042, "loss": 46.0, "step": 108 }, { "epoch": 0.014885118295722236, "grad_norm": 3.468756040092558e-05, "learning_rate": 0.00019945948700756633, "loss": 46.0, "step": 109 }, { "epoch": 0.015021679014031614, "grad_norm": 2.4972347091534175e-05, "learning_rate": 0.00019944852250252418, "loss": 46.0, "step": 110 }, { "epoch": 0.015158239732340992, "grad_norm": 2.8265107175684534e-05, "learning_rate": 0.00019943744820928222, "loss": 46.0, "step": 111 }, { "epoch": 0.01529480045065037, "grad_norm": 2.0429773940122686e-05, "learning_rate": 0.00019942626414006615, "loss": 46.0, "step": 112 }, { "epoch": 0.015431361168959748, "grad_norm": 3.450764052104205e-05, "learning_rate": 0.00019941497030722286, "loss": 46.0, "step": 113 }, { "epoch": 0.015567921887269126, "grad_norm": 4.498309499467723e-05, "learning_rate": 0.00019940356672322037, "loss": 46.0, "step": 114 }, { "epoch": 0.015704482605578506, "grad_norm": 2.8786233087885194e-05, "learning_rate": 0.00019939205340064792, "loss": 46.0, "step": 115 }, { "epoch": 0.015841043323887884, "grad_norm": 3.189581184415147e-05, "learning_rate": 0.00019938043035221586, "loss": 46.0, "step": 116 }, { "epoch": 0.015977604042197262, "grad_norm": 4.335124685894698e-05, "learning_rate": 0.0001993686975907557, "loss": 46.0, "step": 117 }, { "epoch": 0.01611416476050664, "grad_norm": 4.708675987785682e-05, "learning_rate": 0.00019935685512922007, "loss": 46.0, "step": 118 }, { "epoch": 0.01625072547881602, "grad_norm": 3.432563244132325e-05, "learning_rate": 0.00019934490298068264, "loss": 46.0, "step": 119 }, { "epoch": 0.016387286197125397, "grad_norm": 4.133255788474344e-05, "learning_rate": 0.0001993328411583383, "loss": 46.0, "step": 120 }, { "epoch": 0.016523846915434775, "grad_norm": 2.959890480269678e-05, "learning_rate": 0.00019932066967550289, "loss": 46.0, "step": 121 }, { "epoch": 0.016660407633744153, "grad_norm": 5.350433275452815e-05, "learning_rate": 0.0001993083885456134, "loss": 46.0, "step": 122 }, { "epoch": 0.01679696835205353, "grad_norm": 2.8684948119916953e-05, "learning_rate": 0.0001992959977822278, "loss": 46.0, "step": 123 }, { "epoch": 0.01693352907036291, "grad_norm": 4.703548984252848e-05, "learning_rate": 0.0001992834973990251, "loss": 46.0, "step": 124 }, { "epoch": 0.017070089788672287, "grad_norm": 2.6726818759925663e-05, "learning_rate": 0.0001992708874098054, "loss": 46.0, "step": 125 }, { "epoch": 0.017206650506981665, "grad_norm": 3.8927741115912795e-05, "learning_rate": 0.00019925816782848975, "loss": 46.0, "step": 126 }, { "epoch": 0.017343211225291044, "grad_norm": 8.123937004711479e-05, "learning_rate": 0.00019924533866912017, "loss": 46.0, "step": 127 }, { "epoch": 0.01747977194360042, "grad_norm": 4.269002965884283e-05, "learning_rate": 0.00019923239994585967, "loss": 46.0, "step": 128 }, { "epoch": 0.017616332661909803, "grad_norm": 6.259434303501621e-05, "learning_rate": 0.0001992193516729922, "loss": 46.0, "step": 129 }, { "epoch": 0.01775289338021918, "grad_norm": 5.1137911214027554e-05, "learning_rate": 0.0001992061938649227, "loss": 46.0, "step": 130 }, { "epoch": 0.01788945409852856, "grad_norm": 8.54436366353184e-05, "learning_rate": 0.00019919292653617694, "loss": 46.0, "step": 131 }, { "epoch": 0.018026014816837937, "grad_norm": 3.590865890146233e-05, "learning_rate": 0.00019917954970140173, "loss": 46.0, "step": 132 }, { "epoch": 0.018162575535147316, "grad_norm": 3.2492869650013745e-05, "learning_rate": 0.00019916606337536466, "loss": 46.0, "step": 133 }, { "epoch": 0.018299136253456694, "grad_norm": 8.628293289802969e-05, "learning_rate": 0.00019915246757295417, "loss": 46.0, "step": 134 }, { "epoch": 0.018435696971766072, "grad_norm": 7.722469308646396e-05, "learning_rate": 0.00019913876230917975, "loss": 46.0, "step": 135 }, { "epoch": 0.01857225769007545, "grad_norm": 4.240256384946406e-05, "learning_rate": 0.00019912494759917148, "loss": 46.0, "step": 136 }, { "epoch": 0.018708818408384828, "grad_norm": 4.052065560244955e-05, "learning_rate": 0.00019911102345818046, "loss": 46.0, "step": 137 }, { "epoch": 0.018845379126694206, "grad_norm": 7.86216405685991e-05, "learning_rate": 0.00019909698990157852, "loss": 46.0, "step": 138 }, { "epoch": 0.018981939845003584, "grad_norm": 0.00010640006075846031, "learning_rate": 0.00019908284694485827, "loss": 46.0, "step": 139 }, { "epoch": 0.019118500563312962, "grad_norm": 0.00015501640154980123, "learning_rate": 0.00019906859460363307, "loss": 46.0, "step": 140 }, { "epoch": 0.01925506128162234, "grad_norm": 7.59594258852303e-05, "learning_rate": 0.00019905423289363715, "loss": 46.0, "step": 141 }, { "epoch": 0.01939162199993172, "grad_norm": 5.662858529831283e-05, "learning_rate": 0.0001990397618307254, "loss": 46.0, "step": 142 }, { "epoch": 0.019528182718241097, "grad_norm": 3.2056228519650176e-05, "learning_rate": 0.00019902518143087342, "loss": 46.0, "step": 143 }, { "epoch": 0.019664743436550475, "grad_norm": 6.27958943368867e-05, "learning_rate": 0.00019901049171017752, "loss": 46.0, "step": 144 }, { "epoch": 0.019801304154859853, "grad_norm": 8.007367432583123e-05, "learning_rate": 0.00019899569268485472, "loss": 46.0, "step": 145 }, { "epoch": 0.019937864873169234, "grad_norm": 6.982972263358533e-05, "learning_rate": 0.00019898078437124276, "loss": 46.0, "step": 146 }, { "epoch": 0.020074425591478613, "grad_norm": 4.9886282795341685e-05, "learning_rate": 0.0001989657667857999, "loss": 46.0, "step": 147 }, { "epoch": 0.02021098630978799, "grad_norm": 9.474890248384327e-05, "learning_rate": 0.0001989506399451051, "loss": 46.0, "step": 148 }, { "epoch": 0.02034754702809737, "grad_norm": 0.00012795208021998405, "learning_rate": 0.00019893540386585804, "loss": 46.0, "step": 149 }, { "epoch": 0.020484107746406747, "grad_norm": 8.636638813186437e-05, "learning_rate": 0.00019892005856487878, "loss": 46.0, "step": 150 }, { "epoch": 0.020620668464716125, "grad_norm": 5.324094672687352e-05, "learning_rate": 0.00019890460405910815, "loss": 46.0, "step": 151 }, { "epoch": 0.020757229183025503, "grad_norm": 5.794732351205312e-05, "learning_rate": 0.00019888904036560745, "loss": 46.0, "step": 152 }, { "epoch": 0.02089378990133488, "grad_norm": 6.407459295587614e-05, "learning_rate": 0.0001988733675015585, "loss": 46.0, "step": 153 }, { "epoch": 0.02103035061964426, "grad_norm": 5.669236270477995e-05, "learning_rate": 0.00019885758548426367, "loss": 46.0, "step": 154 }, { "epoch": 0.021166911337953637, "grad_norm": 8.20392815512605e-05, "learning_rate": 0.0001988416943311459, "loss": 46.0, "step": 155 }, { "epoch": 0.021303472056263015, "grad_norm": 4.120526136830449e-05, "learning_rate": 0.00019882569405974852, "loss": 46.0, "step": 156 }, { "epoch": 0.021440032774572394, "grad_norm": 6.0489343013614416e-05, "learning_rate": 0.0001988095846877353, "loss": 46.0, "step": 157 }, { "epoch": 0.02157659349288177, "grad_norm": 5.2780691476073116e-05, "learning_rate": 0.00019879336623289056, "loss": 46.0, "step": 158 }, { "epoch": 0.02171315421119115, "grad_norm": 7.062828808557242e-05, "learning_rate": 0.00019877703871311903, "loss": 46.0, "step": 159 }, { "epoch": 0.021849714929500528, "grad_norm": 8.607962081441656e-05, "learning_rate": 0.00019876060214644566, "loss": 46.0, "step": 160 }, { "epoch": 0.021986275647809906, "grad_norm": 5.568426786339842e-05, "learning_rate": 0.0001987440565510161, "loss": 46.0, "step": 161 }, { "epoch": 0.022122836366119284, "grad_norm": 2.0496960132732056e-05, "learning_rate": 0.00019872740194509607, "loss": 46.0, "step": 162 }, { "epoch": 0.022259397084428666, "grad_norm": 0.0001443786604795605, "learning_rate": 0.0001987106383470718, "loss": 46.0, "step": 163 }, { "epoch": 0.022395957802738044, "grad_norm": 0.00022565714607480913, "learning_rate": 0.00019869376577544984, "loss": 46.0, "step": 164 }, { "epoch": 0.022532518521047422, "grad_norm": 4.196175359538756e-05, "learning_rate": 0.00019867678424885692, "loss": 46.0, "step": 165 }, { "epoch": 0.0226690792393568, "grad_norm": 5.022220284445211e-05, "learning_rate": 0.0001986596937860402, "loss": 46.0, "step": 166 }, { "epoch": 0.022805639957666178, "grad_norm": 6.462670717155561e-05, "learning_rate": 0.00019864249440586704, "loss": 46.0, "step": 167 }, { "epoch": 0.022942200675975556, "grad_norm": 5.396630149334669e-05, "learning_rate": 0.00019862518612732502, "loss": 46.0, "step": 168 }, { "epoch": 0.023078761394284934, "grad_norm": 7.057916081976146e-05, "learning_rate": 0.00019860776896952201, "loss": 46.0, "step": 169 }, { "epoch": 0.023215322112594312, "grad_norm": 2.83908757410245e-05, "learning_rate": 0.00019859024295168593, "loss": 46.0, "step": 170 }, { "epoch": 0.02335188283090369, "grad_norm": 5.459811654873192e-05, "learning_rate": 0.0001985726080931651, "loss": 46.0, "step": 171 }, { "epoch": 0.02348844354921307, "grad_norm": 9.622200013836846e-05, "learning_rate": 0.0001985548644134278, "loss": 46.0, "step": 172 }, { "epoch": 0.023625004267522447, "grad_norm": 3.8583631976507604e-05, "learning_rate": 0.00019853701193206256, "loss": 46.0, "step": 173 }, { "epoch": 0.023761564985831825, "grad_norm": 1.6933201550273225e-05, "learning_rate": 0.00019851905066877796, "loss": 46.0, "step": 174 }, { "epoch": 0.023898125704141203, "grad_norm": 0.00014548096805810928, "learning_rate": 0.0001985009806434027, "loss": 46.0, "step": 175 }, { "epoch": 0.02403468642245058, "grad_norm": 3.331439438625239e-05, "learning_rate": 0.00019848280187588556, "loss": 46.0, "step": 176 }, { "epoch": 0.02417124714075996, "grad_norm": 0.00010656285303412005, "learning_rate": 0.00019846451438629536, "loss": 46.0, "step": 177 }, { "epoch": 0.024307807859069337, "grad_norm": 5.0110294978367165e-05, "learning_rate": 0.00019844611819482095, "loss": 46.0, "step": 178 }, { "epoch": 0.024444368577378715, "grad_norm": 8.202512981370091e-05, "learning_rate": 0.00019842761332177115, "loss": 46.0, "step": 179 }, { "epoch": 0.024580929295688097, "grad_norm": 4.755376357934438e-05, "learning_rate": 0.00019840899978757485, "loss": 46.0, "step": 180 }, { "epoch": 0.024717490013997475, "grad_norm": 7.185702270362526e-05, "learning_rate": 0.0001983902776127807, "loss": 46.0, "step": 181 }, { "epoch": 0.024854050732306853, "grad_norm": 4.723266465589404e-05, "learning_rate": 0.00019837144681805757, "loss": 46.0, "step": 182 }, { "epoch": 0.02499061145061623, "grad_norm": 0.00011025248386431485, "learning_rate": 0.000198352507424194, "loss": 46.0, "step": 183 }, { "epoch": 0.02512717216892561, "grad_norm": 0.00010296511027263477, "learning_rate": 0.00019833345945209857, "loss": 46.0, "step": 184 }, { "epoch": 0.025263732887234987, "grad_norm": 7.570455636596307e-05, "learning_rate": 0.00019831430292279966, "loss": 46.0, "step": 185 }, { "epoch": 0.025400293605544366, "grad_norm": 8.105228334898129e-05, "learning_rate": 0.0001982950378574455, "loss": 46.0, "step": 186 }, { "epoch": 0.025536854323853744, "grad_norm": 0.00013544574903789908, "learning_rate": 0.00019827566427730412, "loss": 46.0, "step": 187 }, { "epoch": 0.02567341504216312, "grad_norm": 6.9964567956049e-05, "learning_rate": 0.00019825618220376342, "loss": 46.0, "step": 188 }, { "epoch": 0.0258099757604725, "grad_norm": 9.811633208300918e-05, "learning_rate": 0.00019823659165833102, "loss": 46.0, "step": 189 }, { "epoch": 0.025946536478781878, "grad_norm": 0.00023017756757326424, "learning_rate": 0.00019821689266263427, "loss": 46.0, "step": 190 }, { "epoch": 0.026083097197091256, "grad_norm": 0.0003564099024515599, "learning_rate": 0.0001981970852384203, "loss": 46.0, "step": 191 }, { "epoch": 0.026219657915400634, "grad_norm": 5.261121259536594e-05, "learning_rate": 0.00019817716940755586, "loss": 46.0, "step": 192 }, { "epoch": 0.026356218633710012, "grad_norm": 0.00012212673027534038, "learning_rate": 0.00019815714519202753, "loss": 46.0, "step": 193 }, { "epoch": 0.02649277935201939, "grad_norm": 7.185459253378212e-05, "learning_rate": 0.00019813701261394136, "loss": 46.0, "step": 194 }, { "epoch": 0.02662934007032877, "grad_norm": 0.00010008271055994555, "learning_rate": 0.00019811677169552313, "loss": 46.0, "step": 195 }, { "epoch": 0.026765900788638147, "grad_norm": 0.00010963875683955848, "learning_rate": 0.0001980964224591183, "loss": 46.0, "step": 196 }, { "epoch": 0.026902461506947528, "grad_norm": 5.4636468121316284e-05, "learning_rate": 0.00019807596492719167, "loss": 46.0, "step": 197 }, { "epoch": 0.027039022225256906, "grad_norm": 5.760273052146658e-05, "learning_rate": 0.00019805539912232784, "loss": 46.0, "step": 198 }, { "epoch": 0.027175582943566284, "grad_norm": 0.0001271786750294268, "learning_rate": 0.00019803472506723085, "loss": 46.0, "step": 199 }, { "epoch": 0.027312143661875662, "grad_norm": 0.00014666210336145014, "learning_rate": 0.00019801394278472418, "loss": 46.0, "step": 200 }, { "epoch": 0.02744870438018504, "grad_norm": 3.408119664527476e-05, "learning_rate": 0.0001979930522977509, "loss": 46.0, "step": 201 }, { "epoch": 0.02758526509849442, "grad_norm": 0.00010019735782407224, "learning_rate": 0.00019797205362937347, "loss": 46.0, "step": 202 }, { "epoch": 0.027721825816803797, "grad_norm": 0.00011540239211171865, "learning_rate": 0.00019795094680277378, "loss": 46.0, "step": 203 }, { "epoch": 0.027858386535113175, "grad_norm": 8.116603567032143e-05, "learning_rate": 0.0001979297318412532, "loss": 46.0, "step": 204 }, { "epoch": 0.027994947253422553, "grad_norm": 0.0001576711074449122, "learning_rate": 0.00019790840876823232, "loss": 46.0, "step": 205 }, { "epoch": 0.02813150797173193, "grad_norm": 8.809396240394562e-05, "learning_rate": 0.0001978869776072512, "loss": 46.0, "step": 206 }, { "epoch": 0.02826806869004131, "grad_norm": 0.0001056291293934919, "learning_rate": 0.00019786543838196924, "loss": 46.0, "step": 207 }, { "epoch": 0.028404629408350687, "grad_norm": 0.0001859702606452629, "learning_rate": 0.00019784379111616507, "loss": 46.0, "step": 208 }, { "epoch": 0.028541190126660065, "grad_norm": 7.50935505493544e-05, "learning_rate": 0.00019782203583373664, "loss": 46.0, "step": 209 }, { "epoch": 0.028677750844969443, "grad_norm": 4.688434273703024e-05, "learning_rate": 0.00019780017255870114, "loss": 46.0, "step": 210 }, { "epoch": 0.02881431156327882, "grad_norm": 0.00032884004758670926, "learning_rate": 0.00019777820131519495, "loss": 46.0, "step": 211 }, { "epoch": 0.0289508722815882, "grad_norm": 0.0001322666648775339, "learning_rate": 0.0001977561221274737, "loss": 46.0, "step": 212 }, { "epoch": 0.029087432999897578, "grad_norm": 0.00014480279060080647, "learning_rate": 0.00019773393501991212, "loss": 46.0, "step": 213 }, { "epoch": 0.02922399371820696, "grad_norm": 0.00011657484719762579, "learning_rate": 0.0001977116400170041, "loss": 46.0, "step": 214 }, { "epoch": 0.029360554436516337, "grad_norm": 0.00011347379040671512, "learning_rate": 0.00019768923714336272, "loss": 46.0, "step": 215 }, { "epoch": 0.029497115154825716, "grad_norm": 0.00017761997878551483, "learning_rate": 0.00019766672642372002, "loss": 46.0, "step": 216 }, { "epoch": 0.029633675873135094, "grad_norm": 0.00014384661335498095, "learning_rate": 0.00019764410788292722, "loss": 46.0, "step": 217 }, { "epoch": 0.029770236591444472, "grad_norm": 7.11917455191724e-05, "learning_rate": 0.00019762138154595446, "loss": 46.0, "step": 218 }, { "epoch": 0.02990679730975385, "grad_norm": 0.00012330934987403452, "learning_rate": 0.00019759854743789097, "loss": 46.0, "step": 219 }, { "epoch": 0.030043358028063228, "grad_norm": 8.032290497794747e-05, "learning_rate": 0.00019757560558394493, "loss": 46.0, "step": 220 }, { "epoch": 0.030179918746372606, "grad_norm": 0.00021323734836187214, "learning_rate": 0.0001975525560094434, "loss": 46.0, "step": 221 }, { "epoch": 0.030316479464681984, "grad_norm": 0.00014698925951961428, "learning_rate": 0.00019752939873983255, "loss": 46.0, "step": 222 }, { "epoch": 0.030453040182991362, "grad_norm": 0.00018497723795007914, "learning_rate": 0.00019750613380067718, "loss": 46.0, "step": 223 }, { "epoch": 0.03058960090130074, "grad_norm": 0.0001770486414898187, "learning_rate": 0.00019748276121766116, "loss": 46.0, "step": 224 }, { "epoch": 0.03072616161961012, "grad_norm": 6.994479190325364e-05, "learning_rate": 0.00019745928101658707, "loss": 46.0, "step": 225 }, { "epoch": 0.030862722337919497, "grad_norm": 0.0001687954500084743, "learning_rate": 0.00019743569322337642, "loss": 46.0, "step": 226 }, { "epoch": 0.030999283056228875, "grad_norm": 0.00010247869795421138, "learning_rate": 0.00019741199786406938, "loss": 46.0, "step": 227 }, { "epoch": 0.031135843774538253, "grad_norm": 0.00013670619227923453, "learning_rate": 0.00019738819496482494, "loss": 46.0, "step": 228 }, { "epoch": 0.031272404492847634, "grad_norm": 0.00013827405928168446, "learning_rate": 0.0001973642845519208, "loss": 46.0, "step": 229 }, { "epoch": 0.03140896521115701, "grad_norm": 0.00014026669668965042, "learning_rate": 0.00019734026665175334, "loss": 46.0, "step": 230 }, { "epoch": 0.03154552592946639, "grad_norm": 0.00010120788647327572, "learning_rate": 0.00019731614129083754, "loss": 46.0, "step": 231 }, { "epoch": 0.03168208664777577, "grad_norm": 0.00014757270400878042, "learning_rate": 0.0001972919084958072, "loss": 46.0, "step": 232 }, { "epoch": 0.03181864736608515, "grad_norm": 8.340936619788408e-05, "learning_rate": 0.00019726756829341446, "loss": 46.0, "step": 233 }, { "epoch": 0.031955208084394525, "grad_norm": 0.00013478368055075407, "learning_rate": 0.0001972431207105303, "loss": 46.0, "step": 234 }, { "epoch": 0.0320917688027039, "grad_norm": 0.00013591728929895908, "learning_rate": 0.00019721856577414407, "loss": 46.0, "step": 235 }, { "epoch": 0.03222832952101328, "grad_norm": 0.0002845980925485492, "learning_rate": 0.00019719390351136365, "loss": 46.0, "step": 236 }, { "epoch": 0.03236489023932266, "grad_norm": 0.00021167262457311153, "learning_rate": 0.0001971691339494155, "loss": 46.0, "step": 237 }, { "epoch": 0.03250145095763204, "grad_norm": 0.00015637895558029413, "learning_rate": 0.00019714425711564446, "loss": 46.0, "step": 238 }, { "epoch": 0.032638011675941415, "grad_norm": 0.0004328020440880209, "learning_rate": 0.00019711927303751382, "loss": 46.0, "step": 239 }, { "epoch": 0.032774572394250794, "grad_norm": 7.109026773832738e-05, "learning_rate": 0.0001970941817426052, "loss": 46.0, "step": 240 }, { "epoch": 0.03291113311256017, "grad_norm": 0.00022207674919627607, "learning_rate": 0.00019706898325861874, "loss": 46.0, "step": 241 }, { "epoch": 0.03304769383086955, "grad_norm": 7.223385910037905e-05, "learning_rate": 0.0001970436776133727, "loss": 46.0, "step": 242 }, { "epoch": 0.03318425454917893, "grad_norm": 0.0002725286176428199, "learning_rate": 0.00019701826483480388, "loss": 46.0, "step": 243 }, { "epoch": 0.033320815267488306, "grad_norm": 0.00011271587572991848, "learning_rate": 0.00019699274495096712, "loss": 46.0, "step": 244 }, { "epoch": 0.033457375985797684, "grad_norm": 0.0001713872916297987, "learning_rate": 0.0001969671179900357, "loss": 46.0, "step": 245 }, { "epoch": 0.03359393670410706, "grad_norm": 0.0001251544599654153, "learning_rate": 0.00019694138398030094, "loss": 46.0, "step": 246 }, { "epoch": 0.03373049742241644, "grad_norm": 0.00030473063816316426, "learning_rate": 0.00019691554295017246, "loss": 46.0, "step": 247 }, { "epoch": 0.03386705814072582, "grad_norm": 0.00020300064352340996, "learning_rate": 0.000196889594928178, "loss": 46.0, "step": 248 }, { "epoch": 0.034003618859035196, "grad_norm": 0.0001950880396179855, "learning_rate": 0.00019686353994296333, "loss": 46.0, "step": 249 }, { "epoch": 0.034140179577344575, "grad_norm": 0.00034574157325550914, "learning_rate": 0.00019683737802329244, "loss": 46.0, "step": 250 }, { "epoch": 0.03427674029565395, "grad_norm": 0.0001567787694511935, "learning_rate": 0.0001968111091980473, "loss": 46.0, "step": 251 }, { "epoch": 0.03441330101396333, "grad_norm": 0.00012498951400630176, "learning_rate": 0.00019678473349622793, "loss": 46.0, "step": 252 }, { "epoch": 0.03454986173227271, "grad_norm": 0.0002493293723091483, "learning_rate": 0.0001967582509469523, "loss": 46.0, "step": 253 }, { "epoch": 0.03468642245058209, "grad_norm": 0.0003348199534229934, "learning_rate": 0.0001967316615794563, "loss": 46.0, "step": 254 }, { "epoch": 0.034822983168891465, "grad_norm": 0.00020134066289756447, "learning_rate": 0.00019670496542309384, "loss": 46.0, "step": 255 }, { "epoch": 0.03495954388720084, "grad_norm": 8.074977085925639e-05, "learning_rate": 0.0001966781625073367, "loss": 46.0, "step": 256 }, { "epoch": 0.03509610460551023, "grad_norm": 0.00018040316354017705, "learning_rate": 0.00019665125286177449, "loss": 46.0, "step": 257 }, { "epoch": 0.035232665323819606, "grad_norm": 0.00016175376367755234, "learning_rate": 0.00019662423651611464, "loss": 46.0, "step": 258 }, { "epoch": 0.035369226042128984, "grad_norm": 6.748022133251652e-05, "learning_rate": 0.00019659711350018239, "loss": 46.0, "step": 259 }, { "epoch": 0.03550578676043836, "grad_norm": 0.00014227218343876302, "learning_rate": 0.00019656988384392075, "loss": 46.0, "step": 260 }, { "epoch": 0.03564234747874774, "grad_norm": 0.0002607603382784873, "learning_rate": 0.00019654254757739043, "loss": 46.0, "step": 261 }, { "epoch": 0.03577890819705712, "grad_norm": 0.00026646576588973403, "learning_rate": 0.00019651510473076987, "loss": 46.0, "step": 262 }, { "epoch": 0.0359154689153665, "grad_norm": 0.00020209423382766545, "learning_rate": 0.00019648755533435518, "loss": 46.0, "step": 263 }, { "epoch": 0.036052029633675875, "grad_norm": 0.00013208483869675547, "learning_rate": 0.00019645989941855999, "loss": 46.0, "step": 264 }, { "epoch": 0.03618859035198525, "grad_norm": 0.0002046150912065059, "learning_rate": 0.00019643213701391567, "loss": 46.0, "step": 265 }, { "epoch": 0.03632515107029463, "grad_norm": 0.00022820701997261494, "learning_rate": 0.00019640426815107108, "loss": 46.0, "step": 266 }, { "epoch": 0.03646171178860401, "grad_norm": 0.00019359646830707788, "learning_rate": 0.0001963762928607926, "loss": 46.0, "step": 267 }, { "epoch": 0.03659827250691339, "grad_norm": 0.00015012026415206492, "learning_rate": 0.0001963482111739641, "loss": 46.0, "step": 268 }, { "epoch": 0.036734833225222766, "grad_norm": 0.00021516659762710333, "learning_rate": 0.00019632002312158697, "loss": 46.0, "step": 269 }, { "epoch": 0.036871393943532144, "grad_norm": 0.00019021316256839782, "learning_rate": 0.00019629172873477995, "loss": 46.0, "step": 270 }, { "epoch": 0.03700795466184152, "grad_norm": 0.00023805341334082186, "learning_rate": 0.00019626332804477915, "loss": 46.0, "step": 271 }, { "epoch": 0.0371445153801509, "grad_norm": 0.0002716032904572785, "learning_rate": 0.0001962348210829382, "loss": 46.0, "step": 272 }, { "epoch": 0.03728107609846028, "grad_norm": 0.0001799121528165415, "learning_rate": 0.00019620620788072783, "loss": 46.0, "step": 273 }, { "epoch": 0.037417636816769656, "grad_norm": 0.00041423438233323395, "learning_rate": 0.0001961774884697362, "loss": 46.0, "step": 274 }, { "epoch": 0.037554197535079034, "grad_norm": 0.0002172417298424989, "learning_rate": 0.0001961486628816687, "loss": 46.0, "step": 275 }, { "epoch": 0.03769075825338841, "grad_norm": 0.0002554766833782196, "learning_rate": 0.0001961197311483479, "loss": 46.0, "step": 276 }, { "epoch": 0.03782731897169779, "grad_norm": 0.0002692249254323542, "learning_rate": 0.0001960906933017135, "loss": 46.0, "step": 277 }, { "epoch": 0.03796387969000717, "grad_norm": 0.00040551606798544526, "learning_rate": 0.00019606154937382256, "loss": 46.0, "step": 278 }, { "epoch": 0.03810044040831655, "grad_norm": 0.0003460289444774389, "learning_rate": 0.000196032299396849, "loss": 46.0, "step": 279 }, { "epoch": 0.038237001126625925, "grad_norm": 0.000246795651037246, "learning_rate": 0.00019600294340308398, "loss": 46.0, "step": 280 }, { "epoch": 0.0383735618449353, "grad_norm": 0.0005187865463085473, "learning_rate": 0.00019597348142493562, "loss": 46.0, "step": 281 }, { "epoch": 0.03851012256324468, "grad_norm": 0.0005177973653189838, "learning_rate": 0.00019594391349492902, "loss": 46.0, "step": 282 }, { "epoch": 0.03864668328155406, "grad_norm": 0.00031362145091407, "learning_rate": 0.00019591423964570632, "loss": 46.0, "step": 283 }, { "epoch": 0.03878324399986344, "grad_norm": 0.0003161428030580282, "learning_rate": 0.0001958844599100266, "loss": 46.0, "step": 284 }, { "epoch": 0.038919804718172815, "grad_norm": 0.0002518398978281766, "learning_rate": 0.00019585457432076578, "loss": 46.0, "step": 285 }, { "epoch": 0.03905636543648219, "grad_norm": 0.00017934896459337324, "learning_rate": 0.00019582458291091663, "loss": 46.0, "step": 286 }, { "epoch": 0.03919292615479157, "grad_norm": 0.00032432653824798763, "learning_rate": 0.0001957944857135888, "loss": 46.0, "step": 287 }, { "epoch": 0.03932948687310095, "grad_norm": 0.00021099011064507067, "learning_rate": 0.00019576428276200868, "loss": 46.0, "step": 288 }, { "epoch": 0.03946604759141033, "grad_norm": 0.0005440306267701089, "learning_rate": 0.00019573397408951943, "loss": 46.0, "step": 289 }, { "epoch": 0.039602608309719706, "grad_norm": 0.0005642606993205845, "learning_rate": 0.00019570355972958097, "loss": 46.0, "step": 290 }, { "epoch": 0.03973916902802909, "grad_norm": 0.0007911530556157231, "learning_rate": 0.00019567303971576976, "loss": 46.0, "step": 291 }, { "epoch": 0.03987572974633847, "grad_norm": 0.00017880380619317293, "learning_rate": 0.000195642414081779, "loss": 46.0, "step": 292 }, { "epoch": 0.04001229046464785, "grad_norm": 0.00048157072160393, "learning_rate": 0.00019561168286141856, "loss": 46.0, "step": 293 }, { "epoch": 0.040148851182957225, "grad_norm": 0.00010301580186933279, "learning_rate": 0.00019558084608861472, "loss": 46.0, "step": 294 }, { "epoch": 0.0402854119012666, "grad_norm": 0.0003147012903355062, "learning_rate": 0.00019554990379741033, "loss": 46.0, "step": 295 }, { "epoch": 0.04042197261957598, "grad_norm": 0.00039921182906255126, "learning_rate": 0.0001955188560219648, "loss": 46.0, "step": 296 }, { "epoch": 0.04055853333788536, "grad_norm": 0.0004582660039886832, "learning_rate": 0.00019548770279655397, "loss": 46.0, "step": 297 }, { "epoch": 0.04069509405619474, "grad_norm": 0.0001638657267903909, "learning_rate": 0.00019545644415557, "loss": 46.0, "step": 298 }, { "epoch": 0.040831654774504116, "grad_norm": 0.00041633055661804974, "learning_rate": 0.00019542508013352156, "loss": 46.0, "step": 299 }, { "epoch": 0.040968215492813494, "grad_norm": 0.0005134938983246684, "learning_rate": 0.0001953936107650336, "loss": 46.0, "step": 300 }, { "epoch": 0.040968215492813494, "eval_loss": 11.5, "eval_runtime": 20.4404, "eval_samples_per_second": 150.878, "eval_steps_per_second": 75.439, "step": 300 }, { "epoch": 0.04110477621112287, "grad_norm": 0.00037575350143015385, "learning_rate": 0.0001953620360848473, "loss": 46.0, "step": 301 }, { "epoch": 0.04124133692943225, "grad_norm": 0.00024376453075092286, "learning_rate": 0.00019533035612782017, "loss": 46.0, "step": 302 }, { "epoch": 0.04137789764774163, "grad_norm": 0.00043027085484936833, "learning_rate": 0.00019529857092892602, "loss": 46.0, "step": 303 }, { "epoch": 0.041514458366051006, "grad_norm": 0.00039237432065419853, "learning_rate": 0.00019526668052325467, "loss": 46.0, "step": 304 }, { "epoch": 0.041651019084360384, "grad_norm": 0.0001755830307956785, "learning_rate": 0.00019523468494601223, "loss": 46.0, "step": 305 }, { "epoch": 0.04178757980266976, "grad_norm": 0.0001863718789536506, "learning_rate": 0.00019520258423252082, "loss": 46.0, "step": 306 }, { "epoch": 0.04192414052097914, "grad_norm": 0.0003355692024342716, "learning_rate": 0.00019517037841821873, "loss": 46.0, "step": 307 }, { "epoch": 0.04206070123928852, "grad_norm": 0.0002629165828693658, "learning_rate": 0.00019513806753866016, "loss": 46.0, "step": 308 }, { "epoch": 0.0421972619575979, "grad_norm": 0.0004353369877208024, "learning_rate": 0.00019510565162951537, "loss": 46.0, "step": 309 }, { "epoch": 0.042333822675907275, "grad_norm": 0.0001725061738397926, "learning_rate": 0.00019507313072657055, "loss": 46.0, "step": 310 }, { "epoch": 0.04247038339421665, "grad_norm": 0.00021753301552962512, "learning_rate": 0.00019504050486572784, "loss": 46.0, "step": 311 }, { "epoch": 0.04260694411252603, "grad_norm": 0.00035614983062259853, "learning_rate": 0.00019500777408300519, "loss": 46.0, "step": 312 }, { "epoch": 0.04274350483083541, "grad_norm": 0.00025182642275467515, "learning_rate": 0.00019497493841453642, "loss": 46.0, "step": 313 }, { "epoch": 0.04288006554914479, "grad_norm": 0.0003710365854203701, "learning_rate": 0.0001949419978965711, "loss": 46.0, "step": 314 }, { "epoch": 0.043016626267454165, "grad_norm": 0.00031021906761452556, "learning_rate": 0.00019490895256547464, "loss": 46.0, "step": 315 }, { "epoch": 0.04315318698576354, "grad_norm": 0.0002598558203317225, "learning_rate": 0.000194875802457728, "loss": 46.0, "step": 316 }, { "epoch": 0.04328974770407292, "grad_norm": 0.00025477109011262655, "learning_rate": 0.000194842547609928, "loss": 46.0, "step": 317 }, { "epoch": 0.0434263084223823, "grad_norm": 0.0003966864896938205, "learning_rate": 0.00019480918805878697, "loss": 46.0, "step": 318 }, { "epoch": 0.04356286914069168, "grad_norm": 0.0001289808569708839, "learning_rate": 0.00019477572384113282, "loss": 46.0, "step": 319 }, { "epoch": 0.043699429859001056, "grad_norm": 0.00031020533060655, "learning_rate": 0.00019474215499390912, "loss": 46.0, "step": 320 }, { "epoch": 0.043835990577310434, "grad_norm": 0.00045745153329335153, "learning_rate": 0.0001947084815541748, "loss": 46.0, "step": 321 }, { "epoch": 0.04397255129561981, "grad_norm": 0.00031357730040326715, "learning_rate": 0.00019467470355910438, "loss": 46.0, "step": 322 }, { "epoch": 0.04410911201392919, "grad_norm": 0.00021334455232135952, "learning_rate": 0.00019464082104598776, "loss": 46.0, "step": 323 }, { "epoch": 0.04424567273223857, "grad_norm": 0.00033166687353514135, "learning_rate": 0.0001946068340522302, "loss": 46.0, "step": 324 }, { "epoch": 0.04438223345054795, "grad_norm": 0.0004193445493001491, "learning_rate": 0.00019457274261535236, "loss": 46.0, "step": 325 }, { "epoch": 0.04451879416885733, "grad_norm": 0.0005776687175966799, "learning_rate": 0.0001945385467729901, "loss": 46.0, "step": 326 }, { "epoch": 0.04465535488716671, "grad_norm": 0.00021739969088230282, "learning_rate": 0.00019450424656289466, "loss": 46.0, "step": 327 }, { "epoch": 0.04479191560547609, "grad_norm": 0.00025186152197420597, "learning_rate": 0.00019446984202293246, "loss": 46.0, "step": 328 }, { "epoch": 0.044928476323785466, "grad_norm": 0.0004120633821003139, "learning_rate": 0.00019443533319108504, "loss": 46.0, "step": 329 }, { "epoch": 0.045065037042094844, "grad_norm": 0.00028226643917150795, "learning_rate": 0.00019440072010544918, "loss": 46.0, "step": 330 }, { "epoch": 0.04520159776040422, "grad_norm": 0.00048744879313744605, "learning_rate": 0.00019436600280423665, "loss": 46.0, "step": 331 }, { "epoch": 0.0453381584787136, "grad_norm": 0.00019058110774494708, "learning_rate": 0.0001943311813257743, "loss": 46.0, "step": 332 }, { "epoch": 0.04547471919702298, "grad_norm": 0.00047706879558973014, "learning_rate": 0.00019429625570850404, "loss": 46.0, "step": 333 }, { "epoch": 0.045611279915332356, "grad_norm": 0.0005120193236507475, "learning_rate": 0.0001942612259909827, "loss": 46.0, "step": 334 }, { "epoch": 0.045747840633641734, "grad_norm": 0.0002128657652065158, "learning_rate": 0.00019422609221188207, "loss": 46.0, "step": 335 }, { "epoch": 0.04588440135195111, "grad_norm": 0.0005021935794502497, "learning_rate": 0.00019419085440998873, "loss": 46.0, "step": 336 }, { "epoch": 0.04602096207026049, "grad_norm": 0.000529598502907902, "learning_rate": 0.00019415551262420418, "loss": 46.0, "step": 337 }, { "epoch": 0.04615752278856987, "grad_norm": 0.000645966618321836, "learning_rate": 0.0001941200668935447, "loss": 46.0, "step": 338 }, { "epoch": 0.04629408350687925, "grad_norm": 0.0012579227332025766, "learning_rate": 0.00019408451725714136, "loss": 46.0, "step": 339 }, { "epoch": 0.046430644225188625, "grad_norm": 0.0003929708036594093, "learning_rate": 0.00019404886375423984, "loss": 46.0, "step": 340 }, { "epoch": 0.046567204943498, "grad_norm": 0.0008281446644105017, "learning_rate": 0.00019401310642420058, "loss": 46.0, "step": 341 }, { "epoch": 0.04670376566180738, "grad_norm": 0.0003685842384584248, "learning_rate": 0.00019397724530649857, "loss": 46.0, "step": 342 }, { "epoch": 0.04684032638011676, "grad_norm": 0.00041180921834893525, "learning_rate": 0.00019394128044072345, "loss": 46.0, "step": 343 }, { "epoch": 0.04697688709842614, "grad_norm": 0.00038018723716959357, "learning_rate": 0.00019390521186657934, "loss": 46.0, "step": 344 }, { "epoch": 0.047113447816735515, "grad_norm": 0.0006593601428903639, "learning_rate": 0.00019386903962388487, "loss": 46.0, "step": 345 }, { "epoch": 0.04725000853504489, "grad_norm": 0.0003346616867929697, "learning_rate": 0.0001938327637525731, "loss": 46.0, "step": 346 }, { "epoch": 0.04738656925335427, "grad_norm": 0.0006303668487817049, "learning_rate": 0.00019379638429269157, "loss": 46.0, "step": 347 }, { "epoch": 0.04752312997166365, "grad_norm": 0.0009288009605370462, "learning_rate": 0.00019375990128440204, "loss": 46.0, "step": 348 }, { "epoch": 0.04765969068997303, "grad_norm": 0.0009077245485968888, "learning_rate": 0.00019372331476798072, "loss": 46.0, "step": 349 }, { "epoch": 0.047796251408282406, "grad_norm": 0.0006878585554659367, "learning_rate": 0.00019368662478381799, "loss": 46.0, "step": 350 }, { "epoch": 0.047932812126591784, "grad_norm": 0.0005428345175459981, "learning_rate": 0.00019364983137241853, "loss": 46.0, "step": 351 }, { "epoch": 0.04806937284490116, "grad_norm": 0.0005264700739644468, "learning_rate": 0.0001936129345744011, "loss": 46.0, "step": 352 }, { "epoch": 0.04820593356321054, "grad_norm": 0.00034603691892698407, "learning_rate": 0.00019357593443049877, "loss": 46.0, "step": 353 }, { "epoch": 0.04834249428151992, "grad_norm": 0.0005551993381232023, "learning_rate": 0.00019353883098155854, "loss": 46.0, "step": 354 }, { "epoch": 0.048479054999829296, "grad_norm": 0.0005989357596263289, "learning_rate": 0.0001935016242685415, "loss": 46.0, "step": 355 }, { "epoch": 0.048615615718138674, "grad_norm": 0.0003730835160240531, "learning_rate": 0.00019346431433252276, "loss": 46.0, "step": 356 }, { "epoch": 0.04875217643644805, "grad_norm": 0.0006811637431383133, "learning_rate": 0.00019342690121469138, "loss": 46.0, "step": 357 }, { "epoch": 0.04888873715475743, "grad_norm": 0.000448873353889212, "learning_rate": 0.0001933893849563503, "loss": 46.0, "step": 358 }, { "epoch": 0.049025297873066816, "grad_norm": 0.00046228739665821195, "learning_rate": 0.0001933517655989164, "loss": 46.0, "step": 359 }, { "epoch": 0.049161858591376194, "grad_norm": 0.000341800187015906, "learning_rate": 0.00019331404318392027, "loss": 46.0, "step": 360 }, { "epoch": 0.04929841930968557, "grad_norm": 0.0007170014432631433, "learning_rate": 0.00019327621775300637, "loss": 46.0, "step": 361 }, { "epoch": 0.04943498002799495, "grad_norm": 0.0009279394871555269, "learning_rate": 0.00019323828934793286, "loss": 46.0, "step": 362 }, { "epoch": 0.04957154074630433, "grad_norm": 0.0007022646022960544, "learning_rate": 0.0001932002580105715, "loss": 46.0, "step": 363 }, { "epoch": 0.049708101464613706, "grad_norm": 0.0005995425744913518, "learning_rate": 0.0001931621237829078, "loss": 46.0, "step": 364 }, { "epoch": 0.049844662182923084, "grad_norm": 0.0006767901941202581, "learning_rate": 0.00019312388670704081, "loss": 46.0, "step": 365 }, { "epoch": 0.04998122290123246, "grad_norm": 0.00040845770854502916, "learning_rate": 0.00019308554682518313, "loss": 46.0, "step": 366 }, { "epoch": 0.05011778361954184, "grad_norm": 0.00040534368599765003, "learning_rate": 0.00019304710417966079, "loss": 46.0, "step": 367 }, { "epoch": 0.05025434433785122, "grad_norm": 0.00040592235745862126, "learning_rate": 0.0001930085588129134, "loss": 46.0, "step": 368 }, { "epoch": 0.0503909050561606, "grad_norm": 0.00048625541967339814, "learning_rate": 0.00019296991076749381, "loss": 46.0, "step": 369 }, { "epoch": 0.050527465774469975, "grad_norm": 0.0010227779857814312, "learning_rate": 0.00019293116008606837, "loss": 46.0, "step": 370 }, { "epoch": 0.05066402649277935, "grad_norm": 0.0005206182249821723, "learning_rate": 0.00019289230681141667, "loss": 46.0, "step": 371 }, { "epoch": 0.05080058721108873, "grad_norm": 0.0005789480055682361, "learning_rate": 0.00019285335098643153, "loss": 46.0, "step": 372 }, { "epoch": 0.05093714792939811, "grad_norm": 0.00046148046385496855, "learning_rate": 0.00019281429265411907, "loss": 46.0, "step": 373 }, { "epoch": 0.05107370864770749, "grad_norm": 0.0005185164045542479, "learning_rate": 0.00019277513185759844, "loss": 46.0, "step": 374 }, { "epoch": 0.051210269366016865, "grad_norm": 0.0005086156306788325, "learning_rate": 0.0001927358686401021, "loss": 46.0, "step": 375 }, { "epoch": 0.05134683008432624, "grad_norm": 0.0005038722883909941, "learning_rate": 0.0001926965030449754, "loss": 46.0, "step": 376 }, { "epoch": 0.05148339080263562, "grad_norm": 0.001132496865466237, "learning_rate": 0.00019265703511567677, "loss": 46.0, "step": 377 }, { "epoch": 0.051619951520945, "grad_norm": 0.0011954177170991898, "learning_rate": 0.00019261746489577765, "loss": 46.0, "step": 378 }, { "epoch": 0.05175651223925438, "grad_norm": 0.0006181654753163457, "learning_rate": 0.0001925777924289624, "loss": 46.0, "step": 379 }, { "epoch": 0.051893072957563756, "grad_norm": 0.0005807424895465374, "learning_rate": 0.00019253801775902824, "loss": 46.0, "step": 380 }, { "epoch": 0.052029633675873134, "grad_norm": 0.0007045441307127476, "learning_rate": 0.00019249814092988515, "loss": 46.0, "step": 381 }, { "epoch": 0.05216619439418251, "grad_norm": 0.0005537315737456083, "learning_rate": 0.00019245816198555605, "loss": 46.0, "step": 382 }, { "epoch": 0.05230275511249189, "grad_norm": 0.0006181203643791378, "learning_rate": 0.00019241808097017642, "loss": 46.0, "step": 383 }, { "epoch": 0.05243931583080127, "grad_norm": 0.0005752414581365883, "learning_rate": 0.00019237789792799458, "loss": 46.0, "step": 384 }, { "epoch": 0.052575876549110646, "grad_norm": 0.0012608635006472468, "learning_rate": 0.00019233761290337134, "loss": 46.0, "step": 385 }, { "epoch": 0.052712437267420025, "grad_norm": 0.001415454433299601, "learning_rate": 0.0001922972259407802, "loss": 46.0, "step": 386 }, { "epoch": 0.0528489979857294, "grad_norm": 0.001285754144191742, "learning_rate": 0.00019225673708480717, "loss": 46.0, "step": 387 }, { "epoch": 0.05298555870403878, "grad_norm": 0.0007671714411117136, "learning_rate": 0.00019221614638015075, "loss": 46.0, "step": 388 }, { "epoch": 0.05312211942234816, "grad_norm": 0.0009842630242928863, "learning_rate": 0.0001921754538716218, "loss": 46.0, "step": 389 }, { "epoch": 0.05325868014065754, "grad_norm": 0.0023981237318366766, "learning_rate": 0.00019213465960414368, "loss": 46.0, "step": 390 }, { "epoch": 0.053395240858966915, "grad_norm": 0.0003951654944103211, "learning_rate": 0.00019209376362275206, "loss": 46.0, "step": 391 }, { "epoch": 0.05353180157727629, "grad_norm": 0.0011894862400367856, "learning_rate": 0.00019205276597259485, "loss": 46.0, "step": 392 }, { "epoch": 0.05366836229558568, "grad_norm": 0.0006607277318835258, "learning_rate": 0.00019201166669893227, "loss": 46.0, "step": 393 }, { "epoch": 0.053804923013895056, "grad_norm": 0.0026427984703332186, "learning_rate": 0.00019197046584713663, "loss": 46.0, "step": 394 }, { "epoch": 0.053941483732204434, "grad_norm": 0.0005352182779461145, "learning_rate": 0.00019192916346269246, "loss": 46.0, "step": 395 }, { "epoch": 0.05407804445051381, "grad_norm": 0.0007665826706215739, "learning_rate": 0.00019188775959119643, "loss": 46.0, "step": 396 }, { "epoch": 0.05421460516882319, "grad_norm": 0.0006750879692845047, "learning_rate": 0.0001918462542783571, "loss": 46.0, "step": 397 }, { "epoch": 0.05435116588713257, "grad_norm": 0.0007010844419710338, "learning_rate": 0.0001918046475699951, "loss": 46.0, "step": 398 }, { "epoch": 0.05448772660544195, "grad_norm": 0.0007815820863470435, "learning_rate": 0.00019176293951204303, "loss": 46.0, "step": 399 }, { "epoch": 0.054624287323751325, "grad_norm": 0.0012023310409858823, "learning_rate": 0.00019172113015054532, "loss": 46.0, "step": 400 }, { "epoch": 0.0547608480420607, "grad_norm": 0.0008429251029156148, "learning_rate": 0.00019167921953165825, "loss": 46.0, "step": 401 }, { "epoch": 0.05489740876037008, "grad_norm": 0.0011086566373705864, "learning_rate": 0.00019163720770164991, "loss": 46.0, "step": 402 }, { "epoch": 0.05503396947867946, "grad_norm": 0.0006057535065338016, "learning_rate": 0.00019159509470690012, "loss": 46.0, "step": 403 }, { "epoch": 0.05517053019698884, "grad_norm": 0.0005650485400110483, "learning_rate": 0.0001915528805939003, "loss": 46.0, "step": 404 }, { "epoch": 0.055307090915298215, "grad_norm": 0.00042634617420844734, "learning_rate": 0.00019151056540925364, "loss": 46.0, "step": 405 }, { "epoch": 0.055443651633607594, "grad_norm": 0.0010889278491958976, "learning_rate": 0.0001914681491996748, "loss": 46.0, "step": 406 }, { "epoch": 0.05558021235191697, "grad_norm": 0.0005564565653912723, "learning_rate": 0.00019142563201199008, "loss": 46.0, "step": 407 }, { "epoch": 0.05571677307022635, "grad_norm": 0.0007656107773073018, "learning_rate": 0.0001913830138931371, "loss": 46.0, "step": 408 }, { "epoch": 0.05585333378853573, "grad_norm": 0.0008188536739908159, "learning_rate": 0.00019134029489016502, "loss": 46.0, "step": 409 }, { "epoch": 0.055989894506845106, "grad_norm": 0.0009277886711061001, "learning_rate": 0.00019129747505023436, "loss": 46.0, "step": 410 }, { "epoch": 0.056126455225154484, "grad_norm": 0.0008007950964383781, "learning_rate": 0.00019125455442061691, "loss": 46.0, "step": 411 }, { "epoch": 0.05626301594346386, "grad_norm": 0.000565837137401104, "learning_rate": 0.00019121153304869584, "loss": 46.0, "step": 412 }, { "epoch": 0.05639957666177324, "grad_norm": 0.0009090682142414153, "learning_rate": 0.00019116841098196536, "loss": 46.0, "step": 413 }, { "epoch": 0.05653613738008262, "grad_norm": 0.0010395282879471779, "learning_rate": 0.000191125188268031, "loss": 46.0, "step": 414 }, { "epoch": 0.056672698098391996, "grad_norm": 0.0004872768186032772, "learning_rate": 0.00019108186495460933, "loss": 46.0, "step": 415 }, { "epoch": 0.056809258816701375, "grad_norm": 0.0007016431773081422, "learning_rate": 0.00019103844108952803, "loss": 46.0, "step": 416 }, { "epoch": 0.05694581953501075, "grad_norm": 0.0005011600442230701, "learning_rate": 0.00019099491672072566, "loss": 46.0, "step": 417 }, { "epoch": 0.05708238025332013, "grad_norm": 0.0009304819977842271, "learning_rate": 0.00019095129189625193, "loss": 46.0, "step": 418 }, { "epoch": 0.05721894097162951, "grad_norm": 0.0009160469635389745, "learning_rate": 0.0001909075666642673, "loss": 46.0, "step": 419 }, { "epoch": 0.05735550168993889, "grad_norm": 0.001117922831326723, "learning_rate": 0.00019086374107304312, "loss": 46.0, "step": 420 }, { "epoch": 0.057492062408248265, "grad_norm": 0.0009813562501221895, "learning_rate": 0.00019081981517096152, "loss": 46.0, "step": 421 }, { "epoch": 0.05762862312655764, "grad_norm": 0.0006970268441364169, "learning_rate": 0.00019077578900651544, "loss": 46.0, "step": 422 }, { "epoch": 0.05776518384486702, "grad_norm": 0.0004595222999341786, "learning_rate": 0.00019073166262830834, "loss": 46.0, "step": 423 }, { "epoch": 0.0579017445631764, "grad_norm": 0.0009433355298824608, "learning_rate": 0.00019068743608505455, "loss": 46.0, "step": 424 }, { "epoch": 0.05803830528148578, "grad_norm": 0.001090813777409494, "learning_rate": 0.00019064310942557878, "loss": 46.0, "step": 425 }, { "epoch": 0.058174865999795156, "grad_norm": 0.00044983444968238473, "learning_rate": 0.0001905986826988164, "loss": 46.0, "step": 426 }, { "epoch": 0.05831142671810454, "grad_norm": 0.0008178472053259611, "learning_rate": 0.00019055415595381305, "loss": 46.0, "step": 427 }, { "epoch": 0.05844798743641392, "grad_norm": 0.0005727699608542025, "learning_rate": 0.0001905095292397251, "loss": 46.0, "step": 428 }, { "epoch": 0.0585845481547233, "grad_norm": 0.0013587451539933681, "learning_rate": 0.00019046480260581902, "loss": 46.0, "step": 429 }, { "epoch": 0.058721108873032675, "grad_norm": 0.0004678687546402216, "learning_rate": 0.00019041997610147167, "loss": 46.0, "step": 430 }, { "epoch": 0.05885766959134205, "grad_norm": 0.00045540923019871116, "learning_rate": 0.0001903750497761702, "loss": 46.0, "step": 431 }, { "epoch": 0.05899423030965143, "grad_norm": 0.0006064171902835369, "learning_rate": 0.00019033002367951194, "loss": 46.0, "step": 432 }, { "epoch": 0.05913079102796081, "grad_norm": 0.0002765974495559931, "learning_rate": 0.0001902848978612043, "loss": 46.0, "step": 433 }, { "epoch": 0.05926735174627019, "grad_norm": 0.000879188475664705, "learning_rate": 0.00019023967237106492, "loss": 46.0, "step": 434 }, { "epoch": 0.059403912464579565, "grad_norm": 0.0005185411428101361, "learning_rate": 0.00019019434725902137, "loss": 46.0, "step": 435 }, { "epoch": 0.059540473182888944, "grad_norm": 0.0008257310255430639, "learning_rate": 0.00019014892257511118, "loss": 46.0, "step": 436 }, { "epoch": 0.05967703390119832, "grad_norm": 0.0009346020524390042, "learning_rate": 0.00019010339836948186, "loss": 46.0, "step": 437 }, { "epoch": 0.0598135946195077, "grad_norm": 0.0010087540140375495, "learning_rate": 0.00019005777469239076, "loss": 46.0, "step": 438 }, { "epoch": 0.05995015533781708, "grad_norm": 0.0015897548291832209, "learning_rate": 0.00019001205159420513, "loss": 46.0, "step": 439 }, { "epoch": 0.060086716056126456, "grad_norm": 0.0007276605465449393, "learning_rate": 0.0001899662291254018, "loss": 46.0, "step": 440 }, { "epoch": 0.060223276774435834, "grad_norm": 0.0019181488314643502, "learning_rate": 0.00018992030733656746, "loss": 46.0, "step": 441 }, { "epoch": 0.06035983749274521, "grad_norm": 0.0009860562859103084, "learning_rate": 0.00018987428627839843, "loss": 46.0, "step": 442 }, { "epoch": 0.06049639821105459, "grad_norm": 0.0009881850564852357, "learning_rate": 0.0001898281660017005, "loss": 46.0, "step": 443 }, { "epoch": 0.06063295892936397, "grad_norm": 0.0016714398516342044, "learning_rate": 0.00018978194655738917, "loss": 46.0, "step": 444 }, { "epoch": 0.060769519647673347, "grad_norm": 0.000832723337225616, "learning_rate": 0.00018973562799648927, "loss": 46.0, "step": 445 }, { "epoch": 0.060906080365982725, "grad_norm": 0.0007889857515692711, "learning_rate": 0.00018968921037013512, "loss": 46.0, "step": 446 }, { "epoch": 0.0610426410842921, "grad_norm": 0.001141861779615283, "learning_rate": 0.00018964269372957038, "loss": 46.0, "step": 447 }, { "epoch": 0.06117920180260148, "grad_norm": 0.001163587556220591, "learning_rate": 0.00018959607812614807, "loss": 46.0, "step": 448 }, { "epoch": 0.06131576252091086, "grad_norm": 0.0006048906943760812, "learning_rate": 0.0001895493636113304, "loss": 46.0, "step": 449 }, { "epoch": 0.06145232323922024, "grad_norm": 0.0009424586896784604, "learning_rate": 0.00018950255023668876, "loss": 46.0, "step": 450 }, { "epoch": 0.061588883957529615, "grad_norm": 0.000706047285348177, "learning_rate": 0.00018945563805390381, "loss": 46.0, "step": 451 }, { "epoch": 0.06172544467583899, "grad_norm": 0.000651683600153774, "learning_rate": 0.00018940862711476513, "loss": 46.0, "step": 452 }, { "epoch": 0.06186200539414837, "grad_norm": 0.001372483093291521, "learning_rate": 0.00018936151747117141, "loss": 46.0, "step": 453 }, { "epoch": 0.06199856611245775, "grad_norm": 0.0011862257961183786, "learning_rate": 0.0001893143091751303, "loss": 46.0, "step": 454 }, { "epoch": 0.06213512683076713, "grad_norm": 0.0015178897883743048, "learning_rate": 0.00018926700227875833, "loss": 46.0, "step": 455 }, { "epoch": 0.062271687549076506, "grad_norm": 0.0008582820883020759, "learning_rate": 0.0001892195968342809, "loss": 46.0, "step": 456 }, { "epoch": 0.062408248267385884, "grad_norm": 0.000795324333012104, "learning_rate": 0.00018917209289403227, "loss": 46.0, "step": 457 }, { "epoch": 0.06254480898569527, "grad_norm": 0.001082447823137045, "learning_rate": 0.00018912449051045527, "loss": 46.0, "step": 458 }, { "epoch": 0.06268136970400465, "grad_norm": 0.001177507103420794, "learning_rate": 0.00018907678973610156, "loss": 46.0, "step": 459 }, { "epoch": 0.06281793042231403, "grad_norm": 0.0007584211998619139, "learning_rate": 0.00018902899062363143, "loss": 46.0, "step": 460 }, { "epoch": 0.0629544911406234, "grad_norm": 0.0005909963510930538, "learning_rate": 0.00018898109322581356, "loss": 46.0, "step": 461 }, { "epoch": 0.06309105185893278, "grad_norm": 0.0007048872066661716, "learning_rate": 0.0001889330975955253, "loss": 46.0, "step": 462 }, { "epoch": 0.06322761257724216, "grad_norm": 0.0005063001881353557, "learning_rate": 0.0001888850037857524, "loss": 46.0, "step": 463 }, { "epoch": 0.06336417329555154, "grad_norm": 0.000993978581391275, "learning_rate": 0.000188836811849589, "loss": 46.0, "step": 464 }, { "epoch": 0.06350073401386092, "grad_norm": 0.0010828451486304402, "learning_rate": 0.0001887885218402375, "loss": 46.0, "step": 465 }, { "epoch": 0.0636372947321703, "grad_norm": 0.0008432026370428503, "learning_rate": 0.00018874013381100875, "loss": 46.0, "step": 466 }, { "epoch": 0.06377385545047967, "grad_norm": 0.0007918166811577976, "learning_rate": 0.00018869164781532157, "loss": 46.0, "step": 467 }, { "epoch": 0.06391041616878905, "grad_norm": 0.0007451958954334259, "learning_rate": 0.00018864306390670307, "loss": 46.0, "step": 468 }, { "epoch": 0.06404697688709843, "grad_norm": 0.0007615243084728718, "learning_rate": 0.00018859438213878849, "loss": 46.0, "step": 469 }, { "epoch": 0.0641835376054078, "grad_norm": 0.00018180711776949465, "learning_rate": 0.000188545602565321, "loss": 46.0, "step": 470 }, { "epoch": 0.06432009832371718, "grad_norm": 0.0009969191160053015, "learning_rate": 0.0001884967252401518, "loss": 46.0, "step": 471 }, { "epoch": 0.06445665904202656, "grad_norm": 0.0008999738493002951, "learning_rate": 0.00018844775021724004, "loss": 46.0, "step": 472 }, { "epoch": 0.06459321976033594, "grad_norm": 0.0006670115399174392, "learning_rate": 0.00018839867755065265, "loss": 46.0, "step": 473 }, { "epoch": 0.06472978047864532, "grad_norm": 0.0011296794982627034, "learning_rate": 0.00018834950729456433, "loss": 46.0, "step": 474 }, { "epoch": 0.0648663411969547, "grad_norm": 0.0004901388892903924, "learning_rate": 0.0001883002395032577, "loss": 46.0, "step": 475 }, { "epoch": 0.06500290191526407, "grad_norm": 0.0017039328813552856, "learning_rate": 0.00018825087423112282, "loss": 46.0, "step": 476 }, { "epoch": 0.06513946263357345, "grad_norm": 0.0009402755531482399, "learning_rate": 0.00018820141153265754, "loss": 46.0, "step": 477 }, { "epoch": 0.06527602335188283, "grad_norm": 0.0008238424779847264, "learning_rate": 0.00018815185146246716, "loss": 46.0, "step": 478 }, { "epoch": 0.06541258407019221, "grad_norm": 0.0007881290512159467, "learning_rate": 0.00018810219407526456, "loss": 46.0, "step": 479 }, { "epoch": 0.06554914478850159, "grad_norm": 0.0010467070387676358, "learning_rate": 0.00018805243942587, "loss": 46.0, "step": 480 }, { "epoch": 0.06568570550681097, "grad_norm": 0.0009283073595725, "learning_rate": 0.0001880025875692111, "loss": 46.0, "step": 481 }, { "epoch": 0.06582226622512034, "grad_norm": 0.000698375515639782, "learning_rate": 0.00018795263856032288, "loss": 46.0, "step": 482 }, { "epoch": 0.06595882694342972, "grad_norm": 0.0015646845567971468, "learning_rate": 0.00018790259245434748, "loss": 46.0, "step": 483 }, { "epoch": 0.0660953876617391, "grad_norm": 0.0008225208730436862, "learning_rate": 0.00018785244930653438, "loss": 46.0, "step": 484 }, { "epoch": 0.06623194838004848, "grad_norm": 0.0006926036439836025, "learning_rate": 0.00018780220917224006, "loss": 46.0, "step": 485 }, { "epoch": 0.06636850909835786, "grad_norm": 0.000991306733340025, "learning_rate": 0.00018775187210692815, "loss": 46.0, "step": 486 }, { "epoch": 0.06650506981666723, "grad_norm": 0.0006662954692728817, "learning_rate": 0.00018770143816616928, "loss": 46.0, "step": 487 }, { "epoch": 0.06664163053497661, "grad_norm": 0.000875332101713866, "learning_rate": 0.000187650907405641, "loss": 46.0, "step": 488 }, { "epoch": 0.06677819125328599, "grad_norm": 0.002434828784316778, "learning_rate": 0.00018760027988112775, "loss": 46.0, "step": 489 }, { "epoch": 0.06691475197159537, "grad_norm": 0.002048447262495756, "learning_rate": 0.0001875495556485208, "loss": 46.0, "step": 490 }, { "epoch": 0.06705131268990475, "grad_norm": 0.0005600312724709511, "learning_rate": 0.00018749873476381828, "loss": 46.0, "step": 491 }, { "epoch": 0.06718787340821412, "grad_norm": 0.00041424104711040854, "learning_rate": 0.00018744781728312479, "loss": 46.0, "step": 492 }, { "epoch": 0.0673244341265235, "grad_norm": 0.0009647294646129012, "learning_rate": 0.0001873968032626518, "loss": 46.0, "step": 493 }, { "epoch": 0.06746099484483288, "grad_norm": 0.0007067355909384787, "learning_rate": 0.00018734569275871726, "loss": 46.0, "step": 494 }, { "epoch": 0.06759755556314226, "grad_norm": 0.000527632946614176, "learning_rate": 0.00018729448582774559, "loss": 46.0, "step": 495 }, { "epoch": 0.06773411628145164, "grad_norm": 0.000945943349506706, "learning_rate": 0.00018724318252626775, "loss": 46.0, "step": 496 }, { "epoch": 0.06787067699976101, "grad_norm": 0.0016438892344012856, "learning_rate": 0.00018719178291092106, "loss": 46.0, "step": 497 }, { "epoch": 0.06800723771807039, "grad_norm": 0.0018535932758823037, "learning_rate": 0.00018714028703844914, "loss": 46.0, "step": 498 }, { "epoch": 0.06814379843637977, "grad_norm": 0.0012148134410381317, "learning_rate": 0.00018708869496570192, "loss": 46.0, "step": 499 }, { "epoch": 0.06828035915468915, "grad_norm": 0.0008952637435868382, "learning_rate": 0.00018703700674963547, "loss": 46.0, "step": 500 }, { "epoch": 0.06841691987299853, "grad_norm": 0.000657785392832011, "learning_rate": 0.0001869852224473121, "loss": 46.0, "step": 501 }, { "epoch": 0.0685534805913079, "grad_norm": 0.0008411743910983205, "learning_rate": 0.00018693334211590006, "loss": 46.0, "step": 502 }, { "epoch": 0.06869004130961728, "grad_norm": 0.0008810686995275319, "learning_rate": 0.00018688136581267373, "loss": 46.0, "step": 503 }, { "epoch": 0.06882660202792666, "grad_norm": 0.0005853201728314161, "learning_rate": 0.00018682929359501338, "loss": 46.0, "step": 504 }, { "epoch": 0.06896316274623604, "grad_norm": 0.0004718601703643799, "learning_rate": 0.00018677712552040522, "loss": 46.0, "step": 505 }, { "epoch": 0.06909972346454542, "grad_norm": 0.0007315895054489374, "learning_rate": 0.00018672486164644116, "loss": 46.0, "step": 506 }, { "epoch": 0.0692362841828548, "grad_norm": 0.0009641025681048632, "learning_rate": 0.00018667250203081905, "loss": 46.0, "step": 507 }, { "epoch": 0.06937284490116417, "grad_norm": 0.0008396725752390921, "learning_rate": 0.00018662004673134232, "loss": 46.0, "step": 508 }, { "epoch": 0.06950940561947355, "grad_norm": 0.000584542634896934, "learning_rate": 0.00018656749580592003, "loss": 46.0, "step": 509 }, { "epoch": 0.06964596633778293, "grad_norm": 0.0005784454988315701, "learning_rate": 0.00018651484931256685, "loss": 46.0, "step": 510 }, { "epoch": 0.06978252705609231, "grad_norm": 0.0003768317692447454, "learning_rate": 0.00018646210730940293, "loss": 46.0, "step": 511 }, { "epoch": 0.06991908777440169, "grad_norm": 0.0012037859996780753, "learning_rate": 0.00018640926985465388, "loss": 46.0, "step": 512 }, { "epoch": 0.07005564849271108, "grad_norm": 0.0007435141014866531, "learning_rate": 0.00018635633700665063, "loss": 46.0, "step": 513 }, { "epoch": 0.07019220921102046, "grad_norm": 0.0009005676256492734, "learning_rate": 0.00018630330882382951, "loss": 46.0, "step": 514 }, { "epoch": 0.07032876992932983, "grad_norm": 0.0005016371724195778, "learning_rate": 0.00018625018536473206, "loss": 46.0, "step": 515 }, { "epoch": 0.07046533064763921, "grad_norm": 0.0012827449245378375, "learning_rate": 0.00018619696668800492, "loss": 46.0, "step": 516 }, { "epoch": 0.07060189136594859, "grad_norm": 0.0004301304288674146, "learning_rate": 0.0001861436528524, "loss": 46.0, "step": 517 }, { "epoch": 0.07073845208425797, "grad_norm": 0.00036730722058564425, "learning_rate": 0.00018609024391677418, "loss": 46.0, "step": 518 }, { "epoch": 0.07087501280256735, "grad_norm": 0.0008029394666664302, "learning_rate": 0.00018603673994008925, "loss": 46.0, "step": 519 }, { "epoch": 0.07101157352087673, "grad_norm": 0.0006513711996376514, "learning_rate": 0.00018598314098141206, "loss": 46.0, "step": 520 }, { "epoch": 0.0711481342391861, "grad_norm": 0.00077268440509215, "learning_rate": 0.00018592944709991426, "loss": 46.0, "step": 521 }, { "epoch": 0.07128469495749548, "grad_norm": 0.0004973417380824685, "learning_rate": 0.00018587565835487233, "loss": 46.0, "step": 522 }, { "epoch": 0.07142125567580486, "grad_norm": 0.000682865793351084, "learning_rate": 0.00018582177480566735, "loss": 46.0, "step": 523 }, { "epoch": 0.07155781639411424, "grad_norm": 0.0006797179230488837, "learning_rate": 0.00018576779651178522, "loss": 46.0, "step": 524 }, { "epoch": 0.07169437711242362, "grad_norm": 0.000731765350792557, "learning_rate": 0.00018571372353281632, "loss": 46.0, "step": 525 }, { "epoch": 0.071830937830733, "grad_norm": 0.00029318922315724194, "learning_rate": 0.00018565955592845563, "loss": 46.0, "step": 526 }, { "epoch": 0.07196749854904237, "grad_norm": 0.0011708553647622466, "learning_rate": 0.00018560529375850259, "loss": 46.0, "step": 527 }, { "epoch": 0.07210405926735175, "grad_norm": 0.00033764285035431385, "learning_rate": 0.00018555093708286094, "loss": 46.0, "step": 528 }, { "epoch": 0.07224061998566113, "grad_norm": 0.0011742584174498916, "learning_rate": 0.00018549648596153891, "loss": 46.0, "step": 529 }, { "epoch": 0.0723771807039705, "grad_norm": 0.0011071540648117661, "learning_rate": 0.00018544194045464886, "loss": 46.0, "step": 530 }, { "epoch": 0.07251374142227988, "grad_norm": 0.0004732667875941843, "learning_rate": 0.00018538730062240744, "loss": 46.0, "step": 531 }, { "epoch": 0.07265030214058926, "grad_norm": 0.0006753505440428853, "learning_rate": 0.00018533256652513536, "loss": 46.0, "step": 532 }, { "epoch": 0.07278686285889864, "grad_norm": 0.0013869997346773744, "learning_rate": 0.00018527773822325742, "loss": 46.0, "step": 533 }, { "epoch": 0.07292342357720802, "grad_norm": 0.0004685988824348897, "learning_rate": 0.0001852228157773025, "loss": 46.0, "step": 534 }, { "epoch": 0.0730599842955174, "grad_norm": 0.0005608046194538474, "learning_rate": 0.00018516779924790324, "loss": 46.0, "step": 535 }, { "epoch": 0.07319654501382677, "grad_norm": 0.0007237467798404396, "learning_rate": 0.00018511268869579635, "loss": 46.0, "step": 536 }, { "epoch": 0.07333310573213615, "grad_norm": 0.0007371974061243236, "learning_rate": 0.00018505748418182213, "loss": 46.0, "step": 537 }, { "epoch": 0.07346966645044553, "grad_norm": 0.0021626208908855915, "learning_rate": 0.0001850021857669248, "loss": 46.0, "step": 538 }, { "epoch": 0.07360622716875491, "grad_norm": 0.0044371578842401505, "learning_rate": 0.0001849467935121521, "loss": 46.0, "step": 539 }, { "epoch": 0.07374278788706429, "grad_norm": 0.001232149894349277, "learning_rate": 0.00018489130747865548, "loss": 46.0, "step": 540 }, { "epoch": 0.07387934860537367, "grad_norm": 0.0007343983743339777, "learning_rate": 0.00018483572772768982, "loss": 46.0, "step": 541 }, { "epoch": 0.07401590932368304, "grad_norm": 0.00047310179797932506, "learning_rate": 0.00018478005432061352, "loss": 46.0, "step": 542 }, { "epoch": 0.07415247004199242, "grad_norm": 0.0008101825369521976, "learning_rate": 0.00018472428731888837, "loss": 46.0, "step": 543 }, { "epoch": 0.0742890307603018, "grad_norm": 0.001507743145339191, "learning_rate": 0.00018466842678407946, "loss": 46.0, "step": 544 }, { "epoch": 0.07442559147861118, "grad_norm": 0.0009909087093546987, "learning_rate": 0.00018461247277785513, "loss": 46.0, "step": 545 }, { "epoch": 0.07456215219692056, "grad_norm": 0.0013652259949594736, "learning_rate": 0.000184556425361987, "loss": 46.0, "step": 546 }, { "epoch": 0.07469871291522993, "grad_norm": 0.0008130900096148252, "learning_rate": 0.0001845002845983497, "loss": 46.0, "step": 547 }, { "epoch": 0.07483527363353931, "grad_norm": 0.0012740385718643665, "learning_rate": 0.00018444405054892092, "loss": 46.0, "step": 548 }, { "epoch": 0.07497183435184869, "grad_norm": 0.000746730831451714, "learning_rate": 0.0001843877232757815, "loss": 46.0, "step": 549 }, { "epoch": 0.07510839507015807, "grad_norm": 0.0019112954614683986, "learning_rate": 0.0001843313028411149, "loss": 46.0, "step": 550 }, { "epoch": 0.07524495578846745, "grad_norm": 0.0005015329807065427, "learning_rate": 0.0001842747893072077, "loss": 46.0, "step": 551 }, { "epoch": 0.07538151650677682, "grad_norm": 0.0004962061066180468, "learning_rate": 0.00018421818273644912, "loss": 46.0, "step": 552 }, { "epoch": 0.0755180772250862, "grad_norm": 0.00037836559931747615, "learning_rate": 0.00018416148319133117, "loss": 46.0, "step": 553 }, { "epoch": 0.07565463794339558, "grad_norm": 0.0003992395068053156, "learning_rate": 0.0001841046907344484, "loss": 46.0, "step": 554 }, { "epoch": 0.07579119866170496, "grad_norm": 0.0004277300031390041, "learning_rate": 0.000184047805428498, "loss": 46.0, "step": 555 }, { "epoch": 0.07592775938001434, "grad_norm": 0.0005832565948367119, "learning_rate": 0.00018399082733627965, "loss": 46.0, "step": 556 }, { "epoch": 0.07606432009832371, "grad_norm": 0.0006913339020684361, "learning_rate": 0.00018393375652069548, "loss": 46.0, "step": 557 }, { "epoch": 0.0762008808166331, "grad_norm": 0.00086840835865587, "learning_rate": 0.00018387659304474994, "loss": 46.0, "step": 558 }, { "epoch": 0.07633744153494247, "grad_norm": 0.0007002664497122169, "learning_rate": 0.00018381933697154986, "loss": 46.0, "step": 559 }, { "epoch": 0.07647400225325185, "grad_norm": 0.0008870341116562486, "learning_rate": 0.00018376198836430417, "loss": 46.0, "step": 560 }, { "epoch": 0.07661056297156123, "grad_norm": 0.0006294609629549086, "learning_rate": 0.00018370454728632404, "loss": 46.0, "step": 561 }, { "epoch": 0.0767471236898706, "grad_norm": 0.0004686205938924104, "learning_rate": 0.00018364701380102266, "loss": 46.0, "step": 562 }, { "epoch": 0.07688368440817998, "grad_norm": 0.0008394161704927683, "learning_rate": 0.0001835893879719154, "loss": 46.0, "step": 563 }, { "epoch": 0.07702024512648936, "grad_norm": 0.0006610259879380465, "learning_rate": 0.00018353166986261935, "loss": 46.0, "step": 564 }, { "epoch": 0.07715680584479874, "grad_norm": 0.000717335322406143, "learning_rate": 0.0001834738595368536, "loss": 46.0, "step": 565 }, { "epoch": 0.07729336656310812, "grad_norm": 0.0005708184908144176, "learning_rate": 0.00018341595705843907, "loss": 46.0, "step": 566 }, { "epoch": 0.0774299272814175, "grad_norm": 0.0009451278601773083, "learning_rate": 0.00018335796249129832, "loss": 46.0, "step": 567 }, { "epoch": 0.07756648799972687, "grad_norm": 0.0005919419345445931, "learning_rate": 0.0001832998758994556, "loss": 46.0, "step": 568 }, { "epoch": 0.07770304871803625, "grad_norm": 0.0006907092756591737, "learning_rate": 0.00018324169734703683, "loss": 46.0, "step": 569 }, { "epoch": 0.07783960943634563, "grad_norm": 0.0008807244012132287, "learning_rate": 0.00018318342689826938, "loss": 46.0, "step": 570 }, { "epoch": 0.07797617015465501, "grad_norm": 0.0006545766955241561, "learning_rate": 0.00018312506461748207, "loss": 46.0, "step": 571 }, { "epoch": 0.07811273087296439, "grad_norm": 0.00029055686900392175, "learning_rate": 0.0001830666105691051, "loss": 46.0, "step": 572 }, { "epoch": 0.07824929159127376, "grad_norm": 0.000587637594435364, "learning_rate": 0.00018300806481767005, "loss": 46.0, "step": 573 }, { "epoch": 0.07838585230958314, "grad_norm": 0.0003420762368477881, "learning_rate": 0.00018294942742780966, "loss": 46.0, "step": 574 }, { "epoch": 0.07852241302789252, "grad_norm": 0.00046790859778411686, "learning_rate": 0.00018289069846425782, "loss": 46.0, "step": 575 }, { "epoch": 0.0786589737462019, "grad_norm": 0.00026647665072232485, "learning_rate": 0.00018283187799184958, "loss": 46.0, "step": 576 }, { "epoch": 0.07879553446451128, "grad_norm": 0.0015470724320039153, "learning_rate": 0.000182772966075521, "loss": 46.0, "step": 577 }, { "epoch": 0.07893209518282066, "grad_norm": 0.00032806419767439365, "learning_rate": 0.00018271396278030905, "loss": 46.0, "step": 578 }, { "epoch": 0.07906865590113003, "grad_norm": 0.0004820665344595909, "learning_rate": 0.0001826548681713516, "loss": 46.0, "step": 579 }, { "epoch": 0.07920521661943941, "grad_norm": 0.0006422748556360602, "learning_rate": 0.00018259568231388738, "loss": 46.0, "step": 580 }, { "epoch": 0.0793417773377488, "grad_norm": 0.0002866037539206445, "learning_rate": 0.00018253640527325578, "loss": 46.0, "step": 581 }, { "epoch": 0.07947833805605818, "grad_norm": 0.0005615049158222973, "learning_rate": 0.00018247703711489686, "loss": 46.0, "step": 582 }, { "epoch": 0.07961489877436756, "grad_norm": 0.001207325840368867, "learning_rate": 0.00018241757790435134, "loss": 46.0, "step": 583 }, { "epoch": 0.07975145949267694, "grad_norm": 0.0005774226738139987, "learning_rate": 0.00018235802770726037, "loss": 46.0, "step": 584 }, { "epoch": 0.07988802021098632, "grad_norm": 0.0005512305651791394, "learning_rate": 0.00018229838658936564, "loss": 46.0, "step": 585 }, { "epoch": 0.0800245809292957, "grad_norm": 0.0021268404088914394, "learning_rate": 0.00018223865461650912, "loss": 46.0, "step": 586 }, { "epoch": 0.08016114164760507, "grad_norm": 0.0017917539225891232, "learning_rate": 0.00018217883185463315, "loss": 46.0, "step": 587 }, { "epoch": 0.08029770236591445, "grad_norm": 0.004704636055976152, "learning_rate": 0.00018211891836978028, "loss": 46.0, "step": 588 }, { "epoch": 0.08043426308422383, "grad_norm": 0.0016004083445295691, "learning_rate": 0.00018205891422809316, "loss": 46.0, "step": 589 }, { "epoch": 0.0805708238025332, "grad_norm": 0.0008631657692603767, "learning_rate": 0.0001819988194958146, "loss": 46.0, "step": 590 }, { "epoch": 0.08070738452084258, "grad_norm": 0.000875883677508682, "learning_rate": 0.00018193863423928742, "loss": 46.0, "step": 591 }, { "epoch": 0.08084394523915196, "grad_norm": 0.0007616846705786884, "learning_rate": 0.0001818783585249543, "loss": 46.0, "step": 592 }, { "epoch": 0.08098050595746134, "grad_norm": 0.0017507924931123853, "learning_rate": 0.00018181799241935787, "loss": 46.0, "step": 593 }, { "epoch": 0.08111706667577072, "grad_norm": 0.0015062005259096622, "learning_rate": 0.00018175753598914047, "loss": 46.0, "step": 594 }, { "epoch": 0.0812536273940801, "grad_norm": 0.0009979240130633116, "learning_rate": 0.0001816969893010442, "loss": 46.0, "step": 595 }, { "epoch": 0.08139018811238947, "grad_norm": 0.0014799052150920033, "learning_rate": 0.00018163635242191085, "loss": 46.0, "step": 596 }, { "epoch": 0.08152674883069885, "grad_norm": 0.002407307270914316, "learning_rate": 0.00018157562541868164, "loss": 46.0, "step": 597 }, { "epoch": 0.08166330954900823, "grad_norm": 0.001063771778717637, "learning_rate": 0.00018151480835839741, "loss": 46.0, "step": 598 }, { "epoch": 0.08179987026731761, "grad_norm": 0.00132046639919281, "learning_rate": 0.0001814539013081984, "loss": 46.0, "step": 599 }, { "epoch": 0.08193643098562699, "grad_norm": 0.001031374093145132, "learning_rate": 0.00018139290433532416, "loss": 46.0, "step": 600 }, { "epoch": 0.08193643098562699, "eval_loss": 11.5, "eval_runtime": 20.7447, "eval_samples_per_second": 148.665, "eval_steps_per_second": 74.332, "step": 600 }, { "epoch": 0.08207299170393637, "grad_norm": 0.0006209348211996257, "learning_rate": 0.00018133181750711348, "loss": 46.0, "step": 601 }, { "epoch": 0.08220955242224574, "grad_norm": 0.0009648207924328744, "learning_rate": 0.00018127064089100447, "loss": 46.0, "step": 602 }, { "epoch": 0.08234611314055512, "grad_norm": 0.0010155495256185532, "learning_rate": 0.00018120937455453424, "loss": 46.0, "step": 603 }, { "epoch": 0.0824826738588645, "grad_norm": 0.0008265697979368269, "learning_rate": 0.000181148018565339, "loss": 46.0, "step": 604 }, { "epoch": 0.08261923457717388, "grad_norm": 0.0006557560409419239, "learning_rate": 0.00018108657299115396, "loss": 46.0, "step": 605 }, { "epoch": 0.08275579529548326, "grad_norm": 0.0003027521597687155, "learning_rate": 0.0001810250378998132, "loss": 46.0, "step": 606 }, { "epoch": 0.08289235601379263, "grad_norm": 0.00038604583824053407, "learning_rate": 0.00018096341335924955, "loss": 46.0, "step": 607 }, { "epoch": 0.08302891673210201, "grad_norm": 0.0011505828006193042, "learning_rate": 0.00018090169943749476, "loss": 46.0, "step": 608 }, { "epoch": 0.08316547745041139, "grad_norm": 0.00021457420371007174, "learning_rate": 0.00018083989620267907, "loss": 46.0, "step": 609 }, { "epoch": 0.08330203816872077, "grad_norm": 0.000474292115541175, "learning_rate": 0.0001807780037230315, "loss": 46.0, "step": 610 }, { "epoch": 0.08343859888703015, "grad_norm": 0.00048646898358128965, "learning_rate": 0.00018071602206687942, "loss": 46.0, "step": 611 }, { "epoch": 0.08357515960533952, "grad_norm": 0.0005354878958314657, "learning_rate": 0.00018065395130264876, "loss": 46.0, "step": 612 }, { "epoch": 0.0837117203236489, "grad_norm": 0.00048499630065634847, "learning_rate": 0.00018059179149886378, "loss": 46.0, "step": 613 }, { "epoch": 0.08384828104195828, "grad_norm": 0.0008654086268506944, "learning_rate": 0.00018052954272414706, "loss": 46.0, "step": 614 }, { "epoch": 0.08398484176026766, "grad_norm": 0.0005253584822639823, "learning_rate": 0.0001804672050472194, "loss": 46.0, "step": 615 }, { "epoch": 0.08412140247857704, "grad_norm": 0.0017661650199443102, "learning_rate": 0.0001804047785368997, "loss": 46.0, "step": 616 }, { "epoch": 0.08425796319688642, "grad_norm": 0.0003864025929942727, "learning_rate": 0.00018034226326210496, "loss": 46.0, "step": 617 }, { "epoch": 0.0843945239151958, "grad_norm": 0.000856466474942863, "learning_rate": 0.00018027965929185024, "loss": 46.0, "step": 618 }, { "epoch": 0.08453108463350517, "grad_norm": 0.0004072580486536026, "learning_rate": 0.00018021696669524842, "loss": 46.0, "step": 619 }, { "epoch": 0.08466764535181455, "grad_norm": 0.0010099663631990552, "learning_rate": 0.0001801541855415102, "loss": 46.0, "step": 620 }, { "epoch": 0.08480420607012393, "grad_norm": 0.001016309019178152, "learning_rate": 0.00018009131589994418, "loss": 46.0, "step": 621 }, { "epoch": 0.0849407667884333, "grad_norm": 0.001154162921011448, "learning_rate": 0.00018002835783995652, "loss": 46.0, "step": 622 }, { "epoch": 0.08507732750674268, "grad_norm": 0.0006390147027559578, "learning_rate": 0.00017996531143105106, "loss": 46.0, "step": 623 }, { "epoch": 0.08521388822505206, "grad_norm": 0.0012196438619866967, "learning_rate": 0.00017990217674282915, "loss": 46.0, "step": 624 }, { "epoch": 0.08535044894336144, "grad_norm": 0.0003661528753582388, "learning_rate": 0.0001798389538449896, "loss": 46.0, "step": 625 }, { "epoch": 0.08548700966167082, "grad_norm": 0.0006461184821091592, "learning_rate": 0.0001797756428073286, "loss": 46.0, "step": 626 }, { "epoch": 0.0856235703799802, "grad_norm": 0.0010678149992600083, "learning_rate": 0.00017971224369973964, "loss": 46.0, "step": 627 }, { "epoch": 0.08576013109828957, "grad_norm": 0.0006297577056102455, "learning_rate": 0.00017964875659221344, "loss": 46.0, "step": 628 }, { "epoch": 0.08589669181659895, "grad_norm": 0.000396767514757812, "learning_rate": 0.00017958518155483788, "loss": 46.0, "step": 629 }, { "epoch": 0.08603325253490833, "grad_norm": 0.0011105970479547977, "learning_rate": 0.00017952151865779792, "loss": 46.0, "step": 630 }, { "epoch": 0.08616981325321771, "grad_norm": 0.0012717852368950844, "learning_rate": 0.00017945776797137543, "loss": 46.0, "step": 631 }, { "epoch": 0.08630637397152709, "grad_norm": 0.0007417348097078502, "learning_rate": 0.00017939392956594933, "loss": 46.0, "step": 632 }, { "epoch": 0.08644293468983646, "grad_norm": 0.00036522196023724973, "learning_rate": 0.00017933000351199533, "loss": 46.0, "step": 633 }, { "epoch": 0.08657949540814584, "grad_norm": 0.0013521965593099594, "learning_rate": 0.00017926598988008582, "loss": 46.0, "step": 634 }, { "epoch": 0.08671605612645522, "grad_norm": 0.0007052735309116542, "learning_rate": 0.00017920188874089, "loss": 46.0, "step": 635 }, { "epoch": 0.0868526168447646, "grad_norm": 0.0010762631427496672, "learning_rate": 0.00017913770016517354, "loss": 46.0, "step": 636 }, { "epoch": 0.08698917756307398, "grad_norm": 0.002226311946287751, "learning_rate": 0.0001790734242237988, "loss": 46.0, "step": 637 }, { "epoch": 0.08712573828138336, "grad_norm": 0.004666191525757313, "learning_rate": 0.00017900906098772444, "loss": 46.0, "step": 638 }, { "epoch": 0.08726229899969273, "grad_norm": 0.0010003356728702784, "learning_rate": 0.00017894461052800557, "loss": 46.0, "step": 639 }, { "epoch": 0.08739885971800211, "grad_norm": 0.0011451984755694866, "learning_rate": 0.00017888007291579357, "loss": 46.0, "step": 640 }, { "epoch": 0.08753542043631149, "grad_norm": 0.0011838976060971618, "learning_rate": 0.000178815448222336, "loss": 46.0, "step": 641 }, { "epoch": 0.08767198115462087, "grad_norm": 0.0011408502468839288, "learning_rate": 0.0001787507365189767, "loss": 46.0, "step": 642 }, { "epoch": 0.08780854187293025, "grad_norm": 0.0007302387966774404, "learning_rate": 0.00017868593787715535, "loss": 46.0, "step": 643 }, { "epoch": 0.08794510259123962, "grad_norm": 0.0010645872680470347, "learning_rate": 0.00017862105236840777, "loss": 46.0, "step": 644 }, { "epoch": 0.088081663309549, "grad_norm": 0.0014955222140997648, "learning_rate": 0.00017855608006436558, "loss": 46.0, "step": 645 }, { "epoch": 0.08821822402785838, "grad_norm": 0.0011010526213794947, "learning_rate": 0.0001784910210367563, "loss": 46.0, "step": 646 }, { "epoch": 0.08835478474616776, "grad_norm": 0.0005369166028685868, "learning_rate": 0.00017842587535740314, "loss": 46.0, "step": 647 }, { "epoch": 0.08849134546447714, "grad_norm": 0.0007034659502096474, "learning_rate": 0.00017836064309822503, "loss": 46.0, "step": 648 }, { "epoch": 0.08862790618278653, "grad_norm": 0.0008996776887215674, "learning_rate": 0.00017829532433123635, "loss": 46.0, "step": 649 }, { "epoch": 0.0887644669010959, "grad_norm": 0.001349107245914638, "learning_rate": 0.00017822991912854713, "loss": 46.0, "step": 650 }, { "epoch": 0.08890102761940528, "grad_norm": 0.00062900735065341, "learning_rate": 0.00017816442756236276, "loss": 46.0, "step": 651 }, { "epoch": 0.08903758833771466, "grad_norm": 0.000961163139436394, "learning_rate": 0.00017809884970498396, "loss": 46.0, "step": 652 }, { "epoch": 0.08917414905602404, "grad_norm": 0.0008951184572651982, "learning_rate": 0.00017803318562880673, "loss": 46.0, "step": 653 }, { "epoch": 0.08931070977433342, "grad_norm": 0.000987289589829743, "learning_rate": 0.00017796743540632223, "loss": 46.0, "step": 654 }, { "epoch": 0.0894472704926428, "grad_norm": 0.0005511721828952432, "learning_rate": 0.0001779015991101168, "loss": 46.0, "step": 655 }, { "epoch": 0.08958383121095218, "grad_norm": 0.0008138107368722558, "learning_rate": 0.00017783567681287166, "loss": 46.0, "step": 656 }, { "epoch": 0.08972039192926155, "grad_norm": 0.0005222151521593332, "learning_rate": 0.00017776966858736314, "loss": 46.0, "step": 657 }, { "epoch": 0.08985695264757093, "grad_norm": 0.0005912959459237754, "learning_rate": 0.00017770357450646232, "loss": 46.0, "step": 658 }, { "epoch": 0.08999351336588031, "grad_norm": 0.00042330275755375624, "learning_rate": 0.00017763739464313506, "loss": 46.0, "step": 659 }, { "epoch": 0.09013007408418969, "grad_norm": 0.0006147885578684509, "learning_rate": 0.000177571129070442, "loss": 46.0, "step": 660 }, { "epoch": 0.09026663480249907, "grad_norm": 0.0013564558466896415, "learning_rate": 0.00017750477786153832, "loss": 46.0, "step": 661 }, { "epoch": 0.09040319552080844, "grad_norm": 0.0008140000863932073, "learning_rate": 0.0001774383410896738, "loss": 46.0, "step": 662 }, { "epoch": 0.09053975623911782, "grad_norm": 0.0005956863751634955, "learning_rate": 0.00017737181882819262, "loss": 46.0, "step": 663 }, { "epoch": 0.0906763169574272, "grad_norm": 0.0005575847462750971, "learning_rate": 0.0001773052111505334, "loss": 46.0, "step": 664 }, { "epoch": 0.09081287767573658, "grad_norm": 0.00038412457797676325, "learning_rate": 0.00017723851813022901, "loss": 46.0, "step": 665 }, { "epoch": 0.09094943839404596, "grad_norm": 0.0009578867466188967, "learning_rate": 0.00017717173984090658, "loss": 46.0, "step": 666 }, { "epoch": 0.09108599911235533, "grad_norm": 0.0004775691486429423, "learning_rate": 0.00017710487635628735, "loss": 46.0, "step": 667 }, { "epoch": 0.09122255983066471, "grad_norm": 0.0006319622043520212, "learning_rate": 0.00017703792775018655, "loss": 46.0, "step": 668 }, { "epoch": 0.09135912054897409, "grad_norm": 0.0006505327764898539, "learning_rate": 0.0001769708940965135, "loss": 46.0, "step": 669 }, { "epoch": 0.09149568126728347, "grad_norm": 0.00035890881554223597, "learning_rate": 0.00017690377546927133, "loss": 46.0, "step": 670 }, { "epoch": 0.09163224198559285, "grad_norm": 0.0005791863077320158, "learning_rate": 0.000176836571942557, "loss": 46.0, "step": 671 }, { "epoch": 0.09176880270390222, "grad_norm": 0.0005439603119157255, "learning_rate": 0.00017676928359056123, "loss": 46.0, "step": 672 }, { "epoch": 0.0919053634222116, "grad_norm": 0.0006497172289527953, "learning_rate": 0.0001767019104875683, "loss": 46.0, "step": 673 }, { "epoch": 0.09204192414052098, "grad_norm": 0.0004997221985831857, "learning_rate": 0.00017663445270795611, "loss": 46.0, "step": 674 }, { "epoch": 0.09217848485883036, "grad_norm": 0.0010348627110943198, "learning_rate": 0.0001765669103261961, "loss": 46.0, "step": 675 }, { "epoch": 0.09231504557713974, "grad_norm": 0.0006982755148783326, "learning_rate": 0.00017649928341685297, "loss": 46.0, "step": 676 }, { "epoch": 0.09245160629544912, "grad_norm": 0.0008973072981461883, "learning_rate": 0.00017643157205458483, "loss": 46.0, "step": 677 }, { "epoch": 0.0925881670137585, "grad_norm": 0.0009211792494170368, "learning_rate": 0.00017636377631414303, "loss": 46.0, "step": 678 }, { "epoch": 0.09272472773206787, "grad_norm": 0.0013491098070517182, "learning_rate": 0.00017629589627037203, "loss": 46.0, "step": 679 }, { "epoch": 0.09286128845037725, "grad_norm": 0.0011835723416879773, "learning_rate": 0.00017622793199820934, "loss": 46.0, "step": 680 }, { "epoch": 0.09299784916868663, "grad_norm": 0.0003937036672141403, "learning_rate": 0.0001761598835726855, "loss": 46.0, "step": 681 }, { "epoch": 0.093134409886996, "grad_norm": 0.00029390319832600653, "learning_rate": 0.00017609175106892395, "loss": 46.0, "step": 682 }, { "epoch": 0.09327097060530538, "grad_norm": 0.0005982140428386629, "learning_rate": 0.00017602353456214095, "loss": 46.0, "step": 683 }, { "epoch": 0.09340753132361476, "grad_norm": 0.0007088780985213816, "learning_rate": 0.00017595523412764549, "loss": 46.0, "step": 684 }, { "epoch": 0.09354409204192414, "grad_norm": 0.000541395100299269, "learning_rate": 0.0001758868498408392, "loss": 46.0, "step": 685 }, { "epoch": 0.09368065276023352, "grad_norm": 0.0010750001529231668, "learning_rate": 0.0001758183817772163, "loss": 46.0, "step": 686 }, { "epoch": 0.0938172134785429, "grad_norm": 0.0009917879942804575, "learning_rate": 0.00017574983001236345, "loss": 46.0, "step": 687 }, { "epoch": 0.09395377419685227, "grad_norm": 0.0007594460621476173, "learning_rate": 0.00017568119462195978, "loss": 46.0, "step": 688 }, { "epoch": 0.09409033491516165, "grad_norm": 0.0013289398048073053, "learning_rate": 0.00017561247568177672, "loss": 46.0, "step": 689 }, { "epoch": 0.09422689563347103, "grad_norm": 0.002443774603307247, "learning_rate": 0.00017554367326767792, "loss": 46.0, "step": 690 }, { "epoch": 0.09436345635178041, "grad_norm": 0.000834045116789639, "learning_rate": 0.0001754747874556192, "loss": 46.0, "step": 691 }, { "epoch": 0.09450001707008979, "grad_norm": 0.0008974446100182831, "learning_rate": 0.0001754058183216484, "loss": 46.0, "step": 692 }, { "epoch": 0.09463657778839916, "grad_norm": 0.0006684563704766333, "learning_rate": 0.00017533676594190544, "loss": 46.0, "step": 693 }, { "epoch": 0.09477313850670854, "grad_norm": 0.0005444415146484971, "learning_rate": 0.00017526763039262206, "loss": 46.0, "step": 694 }, { "epoch": 0.09490969922501792, "grad_norm": 0.00041295934352092445, "learning_rate": 0.00017519841175012184, "loss": 46.0, "step": 695 }, { "epoch": 0.0950462599433273, "grad_norm": 0.0014025474665686488, "learning_rate": 0.0001751291100908201, "loss": 46.0, "step": 696 }, { "epoch": 0.09518282066163668, "grad_norm": 0.0016710846684873104, "learning_rate": 0.0001750597254912238, "loss": 46.0, "step": 697 }, { "epoch": 0.09531938137994606, "grad_norm": 0.002275792183354497, "learning_rate": 0.00017499025802793146, "loss": 46.0, "step": 698 }, { "epoch": 0.09545594209825543, "grad_norm": 0.001069681253284216, "learning_rate": 0.0001749207077776331, "loss": 46.0, "step": 699 }, { "epoch": 0.09559250281656481, "grad_norm": 0.0005686444346792996, "learning_rate": 0.00017485107481711012, "loss": 46.0, "step": 700 }, { "epoch": 0.09572906353487419, "grad_norm": 0.0004446564707905054, "learning_rate": 0.00017478135922323522, "loss": 46.0, "step": 701 }, { "epoch": 0.09586562425318357, "grad_norm": 0.0005210356903262436, "learning_rate": 0.00017471156107297232, "loss": 46.0, "step": 702 }, { "epoch": 0.09600218497149295, "grad_norm": 0.0006958620506338775, "learning_rate": 0.00017464168044337654, "loss": 46.0, "step": 703 }, { "epoch": 0.09613874568980232, "grad_norm": 0.000465748249553144, "learning_rate": 0.00017457171741159394, "loss": 46.0, "step": 704 }, { "epoch": 0.0962753064081117, "grad_norm": 0.0003542457125149667, "learning_rate": 0.0001745016720548617, "loss": 46.0, "step": 705 }, { "epoch": 0.09641186712642108, "grad_norm": 0.0006130662513896823, "learning_rate": 0.00017443154445050775, "loss": 46.0, "step": 706 }, { "epoch": 0.09654842784473046, "grad_norm": 0.0005357970949262381, "learning_rate": 0.00017436133467595087, "loss": 46.0, "step": 707 }, { "epoch": 0.09668498856303984, "grad_norm": 0.0003605932288337499, "learning_rate": 0.00017429104280870057, "loss": 46.0, "step": 708 }, { "epoch": 0.09682154928134921, "grad_norm": 0.0004822358023375273, "learning_rate": 0.00017422066892635696, "loss": 46.0, "step": 709 }, { "epoch": 0.09695810999965859, "grad_norm": 0.000799324014224112, "learning_rate": 0.0001741502131066107, "loss": 46.0, "step": 710 }, { "epoch": 0.09709467071796797, "grad_norm": 0.0005530801718123257, "learning_rate": 0.00017407967542724297, "loss": 46.0, "step": 711 }, { "epoch": 0.09723123143627735, "grad_norm": 0.0008446394931524992, "learning_rate": 0.0001740090559661252, "loss": 46.0, "step": 712 }, { "epoch": 0.09736779215458673, "grad_norm": 0.0004706636827904731, "learning_rate": 0.0001739383548012192, "loss": 46.0, "step": 713 }, { "epoch": 0.0975043528728961, "grad_norm": 0.0003858576819766313, "learning_rate": 0.00017386757201057692, "loss": 46.0, "step": 714 }, { "epoch": 0.09764091359120548, "grad_norm": 0.0006433392409235239, "learning_rate": 0.00017379670767234045, "loss": 46.0, "step": 715 }, { "epoch": 0.09777747430951486, "grad_norm": 0.00046265096170827746, "learning_rate": 0.0001737257618647419, "loss": 46.0, "step": 716 }, { "epoch": 0.09791403502782425, "grad_norm": 0.0004183817654848099, "learning_rate": 0.00017365473466610337, "loss": 46.0, "step": 717 }, { "epoch": 0.09805059574613363, "grad_norm": 0.0005469456664286554, "learning_rate": 0.0001735836261548367, "loss": 46.0, "step": 718 }, { "epoch": 0.09818715646444301, "grad_norm": 0.000383463193429634, "learning_rate": 0.00017351243640944362, "loss": 46.0, "step": 719 }, { "epoch": 0.09832371718275239, "grad_norm": 0.0005470316973514855, "learning_rate": 0.00017344116550851543, "loss": 46.0, "step": 720 }, { "epoch": 0.09846027790106177, "grad_norm": 0.00037759976112283766, "learning_rate": 0.00017336981353073314, "loss": 46.0, "step": 721 }, { "epoch": 0.09859683861937114, "grad_norm": 0.0006073216791264713, "learning_rate": 0.00017329838055486717, "loss": 46.0, "step": 722 }, { "epoch": 0.09873339933768052, "grad_norm": 0.0004972846945747733, "learning_rate": 0.00017322686665977737, "loss": 46.0, "step": 723 }, { "epoch": 0.0988699600559899, "grad_norm": 0.0004591084725689143, "learning_rate": 0.00017315527192441298, "loss": 46.0, "step": 724 }, { "epoch": 0.09900652077429928, "grad_norm": 0.0008169691427610815, "learning_rate": 0.00017308359642781242, "loss": 46.0, "step": 725 }, { "epoch": 0.09914308149260866, "grad_norm": 0.0005359476199373603, "learning_rate": 0.00017301184024910333, "loss": 46.0, "step": 726 }, { "epoch": 0.09927964221091803, "grad_norm": 0.0005213018739596009, "learning_rate": 0.0001729400034675024, "loss": 46.0, "step": 727 }, { "epoch": 0.09941620292922741, "grad_norm": 0.0006647670525126159, "learning_rate": 0.00017286808616231522, "loss": 46.0, "step": 728 }, { "epoch": 0.09955276364753679, "grad_norm": 0.0007033472065813839, "learning_rate": 0.00017279608841293639, "loss": 46.0, "step": 729 }, { "epoch": 0.09968932436584617, "grad_norm": 0.0009473874815739691, "learning_rate": 0.00017272401029884933, "loss": 46.0, "step": 730 }, { "epoch": 0.09982588508415555, "grad_norm": 0.0005871194880455732, "learning_rate": 0.00017265185189962608, "loss": 46.0, "step": 731 }, { "epoch": 0.09996244580246492, "grad_norm": 0.00048681360203772783, "learning_rate": 0.00017257961329492728, "loss": 46.0, "step": 732 }, { "epoch": 0.1000990065207743, "grad_norm": 0.0005745171220041811, "learning_rate": 0.00017250729456450234, "loss": 46.0, "step": 733 }, { "epoch": 0.10023556723908368, "grad_norm": 0.00050855748122558, "learning_rate": 0.0001724348957881889, "loss": 46.0, "step": 734 }, { "epoch": 0.10037212795739306, "grad_norm": 0.00042842066613957286, "learning_rate": 0.00017236241704591304, "loss": 46.0, "step": 735 }, { "epoch": 0.10050868867570244, "grad_norm": 0.0010201798286288977, "learning_rate": 0.00017228985841768915, "loss": 46.0, "step": 736 }, { "epoch": 0.10064524939401182, "grad_norm": 0.0005850894376635551, "learning_rate": 0.00017221721998361976, "loss": 46.0, "step": 737 }, { "epoch": 0.1007818101123212, "grad_norm": 0.0009064356563612819, "learning_rate": 0.00017214450182389559, "loss": 46.0, "step": 738 }, { "epoch": 0.10091837083063057, "grad_norm": 0.0015590882394462824, "learning_rate": 0.00017207170401879526, "loss": 46.0, "step": 739 }, { "epoch": 0.10105493154893995, "grad_norm": 0.0011711895931512117, "learning_rate": 0.00017199882664868538, "loss": 46.0, "step": 740 }, { "epoch": 0.10119149226724933, "grad_norm": 0.0029380624182522297, "learning_rate": 0.00017192586979402044, "loss": 46.0, "step": 741 }, { "epoch": 0.1013280529855587, "grad_norm": 0.0007488722330890596, "learning_rate": 0.0001718528335353426, "loss": 46.0, "step": 742 }, { "epoch": 0.10146461370386808, "grad_norm": 0.0006716151256114244, "learning_rate": 0.00017177971795328167, "loss": 46.0, "step": 743 }, { "epoch": 0.10160117442217746, "grad_norm": 0.0008320304332301021, "learning_rate": 0.00017170652312855514, "loss": 46.0, "step": 744 }, { "epoch": 0.10173773514048684, "grad_norm": 0.002169104292988777, "learning_rate": 0.00017163324914196783, "loss": 46.0, "step": 745 }, { "epoch": 0.10187429585879622, "grad_norm": 0.0008557327091693878, "learning_rate": 0.00017155989607441213, "loss": 46.0, "step": 746 }, { "epoch": 0.1020108565771056, "grad_norm": 0.0009351296466775239, "learning_rate": 0.00017148646400686753, "loss": 46.0, "step": 747 }, { "epoch": 0.10214741729541497, "grad_norm": 0.0019438541494309902, "learning_rate": 0.00017141295302040095, "loss": 46.0, "step": 748 }, { "epoch": 0.10228397801372435, "grad_norm": 0.0014289816608652472, "learning_rate": 0.00017133936319616626, "loss": 46.0, "step": 749 }, { "epoch": 0.10242053873203373, "grad_norm": 0.003017352195456624, "learning_rate": 0.00017126569461540443, "loss": 46.0, "step": 750 }, { "epoch": 0.10255709945034311, "grad_norm": 0.0006331245531328022, "learning_rate": 0.00017119194735944337, "loss": 46.0, "step": 751 }, { "epoch": 0.10269366016865249, "grad_norm": 0.0009659952484071255, "learning_rate": 0.0001711181215096979, "loss": 46.0, "step": 752 }, { "epoch": 0.10283022088696187, "grad_norm": 0.0003564142098184675, "learning_rate": 0.00017104421714766947, "loss": 46.0, "step": 753 }, { "epoch": 0.10296678160527124, "grad_norm": 0.0004242552968207747, "learning_rate": 0.00017097023435494636, "loss": 46.0, "step": 754 }, { "epoch": 0.10310334232358062, "grad_norm": 0.00037972754216752946, "learning_rate": 0.00017089617321320335, "loss": 46.0, "step": 755 }, { "epoch": 0.10323990304189, "grad_norm": 0.00022272802016232163, "learning_rate": 0.0001708220338042017, "loss": 46.0, "step": 756 }, { "epoch": 0.10337646376019938, "grad_norm": 0.0004986607236787677, "learning_rate": 0.0001707478162097891, "loss": 46.0, "step": 757 }, { "epoch": 0.10351302447850876, "grad_norm": 0.0006160014308989048, "learning_rate": 0.00017067352051189965, "loss": 46.0, "step": 758 }, { "epoch": 0.10364958519681813, "grad_norm": 0.0006992130074650049, "learning_rate": 0.0001705991467925535, "loss": 46.0, "step": 759 }, { "epoch": 0.10378614591512751, "grad_norm": 0.0003626852994784713, "learning_rate": 0.000170524695133857, "loss": 46.0, "step": 760 }, { "epoch": 0.10392270663343689, "grad_norm": 0.0008579789428040385, "learning_rate": 0.0001704501656180026, "loss": 46.0, "step": 761 }, { "epoch": 0.10405926735174627, "grad_norm": 0.0009895421098917723, "learning_rate": 0.00017037555832726865, "loss": 46.0, "step": 762 }, { "epoch": 0.10419582807005565, "grad_norm": 0.0005099592381156981, "learning_rate": 0.00017030087334401936, "loss": 46.0, "step": 763 }, { "epoch": 0.10433238878836502, "grad_norm": 0.0006343593122437596, "learning_rate": 0.00017022611075070474, "loss": 46.0, "step": 764 }, { "epoch": 0.1044689495066744, "grad_norm": 0.0002918439276982099, "learning_rate": 0.00017015127062986043, "loss": 46.0, "step": 765 }, { "epoch": 0.10460551022498378, "grad_norm": 0.00021448293409775943, "learning_rate": 0.00017007635306410775, "loss": 46.0, "step": 766 }, { "epoch": 0.10474207094329316, "grad_norm": 0.0003549446410033852, "learning_rate": 0.00017000135813615338, "loss": 46.0, "step": 767 }, { "epoch": 0.10487863166160254, "grad_norm": 0.0008276899461634457, "learning_rate": 0.00016992628592878956, "loss": 46.0, "step": 768 }, { "epoch": 0.10501519237991191, "grad_norm": 0.0006797234527766705, "learning_rate": 0.00016985113652489374, "loss": 46.0, "step": 769 }, { "epoch": 0.10515175309822129, "grad_norm": 0.0012585432268679142, "learning_rate": 0.00016977591000742854, "loss": 46.0, "step": 770 }, { "epoch": 0.10528831381653067, "grad_norm": 0.0007930306601338089, "learning_rate": 0.0001697006064594419, "loss": 46.0, "step": 771 }, { "epoch": 0.10542487453484005, "grad_norm": 0.0008263712516054511, "learning_rate": 0.00016962522596406662, "loss": 46.0, "step": 772 }, { "epoch": 0.10556143525314943, "grad_norm": 0.0004895622842013836, "learning_rate": 0.00016954976860452054, "loss": 46.0, "step": 773 }, { "epoch": 0.1056979959714588, "grad_norm": 0.000630986993201077, "learning_rate": 0.00016947423446410636, "loss": 46.0, "step": 774 }, { "epoch": 0.10583455668976818, "grad_norm": 0.0006404675077646971, "learning_rate": 0.00016939862362621146, "loss": 46.0, "step": 775 }, { "epoch": 0.10597111740807756, "grad_norm": 0.0023967279121279716, "learning_rate": 0.00016932293617430796, "loss": 46.0, "step": 776 }, { "epoch": 0.10610767812638694, "grad_norm": 0.0005658793961629272, "learning_rate": 0.0001692471721919526, "loss": 46.0, "step": 777 }, { "epoch": 0.10624423884469632, "grad_norm": 0.0009352597990073264, "learning_rate": 0.00016917133176278648, "loss": 46.0, "step": 778 }, { "epoch": 0.1063807995630057, "grad_norm": 0.0011438351357355714, "learning_rate": 0.00016909541497053522, "loss": 46.0, "step": 779 }, { "epoch": 0.10651736028131507, "grad_norm": 0.0010559734655544162, "learning_rate": 0.00016901942189900867, "loss": 46.0, "step": 780 }, { "epoch": 0.10665392099962445, "grad_norm": 0.0005015085334889591, "learning_rate": 0.0001689433526321009, "loss": 46.0, "step": 781 }, { "epoch": 0.10679048171793383, "grad_norm": 0.0003511472314130515, "learning_rate": 0.0001688672072537902, "loss": 46.0, "step": 782 }, { "epoch": 0.10692704243624321, "grad_norm": 0.000647745851892978, "learning_rate": 0.00016879098584813865, "loss": 46.0, "step": 783 }, { "epoch": 0.10706360315455259, "grad_norm": 0.0014171921648085117, "learning_rate": 0.00016871468849929253, "loss": 46.0, "step": 784 }, { "epoch": 0.10720016387286196, "grad_norm": 0.0005998522392474115, "learning_rate": 0.00016863831529148176, "loss": 46.0, "step": 785 }, { "epoch": 0.10733672459117136, "grad_norm": 0.0008481833501718938, "learning_rate": 0.00016856186630902014, "loss": 46.0, "step": 786 }, { "epoch": 0.10747328530948073, "grad_norm": 0.001625437056645751, "learning_rate": 0.00016848534163630498, "loss": 46.0, "step": 787 }, { "epoch": 0.10760984602779011, "grad_norm": 0.004598119296133518, "learning_rate": 0.0001684087413578173, "loss": 46.0, "step": 788 }, { "epoch": 0.10774640674609949, "grad_norm": 0.0032096696086227894, "learning_rate": 0.00016833206555812153, "loss": 46.0, "step": 789 }, { "epoch": 0.10788296746440887, "grad_norm": 0.00042951159412041306, "learning_rate": 0.00016825531432186543, "loss": 46.0, "step": 790 }, { "epoch": 0.10801952818271825, "grad_norm": 0.0017844205722212791, "learning_rate": 0.00016817848773378007, "loss": 46.0, "step": 791 }, { "epoch": 0.10815608890102762, "grad_norm": 0.0006350985495373607, "learning_rate": 0.00016810158587867973, "loss": 46.0, "step": 792 }, { "epoch": 0.108292649619337, "grad_norm": 0.0009360946132801473, "learning_rate": 0.00016802460884146175, "loss": 46.0, "step": 793 }, { "epoch": 0.10842921033764638, "grad_norm": 0.00030124749173410237, "learning_rate": 0.0001679475567071065, "loss": 46.0, "step": 794 }, { "epoch": 0.10856577105595576, "grad_norm": 0.00164910894818604, "learning_rate": 0.0001678704295606772, "loss": 46.0, "step": 795 }, { "epoch": 0.10870233177426514, "grad_norm": 0.0012204207014292479, "learning_rate": 0.00016779322748731995, "loss": 46.0, "step": 796 }, { "epoch": 0.10883889249257452, "grad_norm": 0.0007130260928533971, "learning_rate": 0.0001677159505722635, "loss": 46.0, "step": 797 }, { "epoch": 0.1089754532108839, "grad_norm": 0.0006258345092646778, "learning_rate": 0.0001676385989008193, "loss": 46.0, "step": 798 }, { "epoch": 0.10911201392919327, "grad_norm": 0.0013600183883681893, "learning_rate": 0.00016756117255838128, "loss": 46.0, "step": 799 }, { "epoch": 0.10924857464750265, "grad_norm": 0.001245712861418724, "learning_rate": 0.00016748367163042576, "loss": 46.0, "step": 800 }, { "epoch": 0.10938513536581203, "grad_norm": 0.00043424879550002515, "learning_rate": 0.0001674060962025115, "loss": 46.0, "step": 801 }, { "epoch": 0.1095216960841214, "grad_norm": 0.00040041119791567326, "learning_rate": 0.00016732844636027948, "loss": 46.0, "step": 802 }, { "epoch": 0.10965825680243078, "grad_norm": 0.00047825041110627353, "learning_rate": 0.00016725072218945272, "loss": 46.0, "step": 803 }, { "epoch": 0.10979481752074016, "grad_norm": 0.0006943688495084643, "learning_rate": 0.00016717292377583647, "loss": 46.0, "step": 804 }, { "epoch": 0.10993137823904954, "grad_norm": 0.00046805053716525435, "learning_rate": 0.00016709505120531782, "loss": 46.0, "step": 805 }, { "epoch": 0.11006793895735892, "grad_norm": 0.001021806849166751, "learning_rate": 0.00016701710456386572, "loss": 46.0, "step": 806 }, { "epoch": 0.1102044996756683, "grad_norm": 0.000839448010083288, "learning_rate": 0.000166939083937531, "loss": 46.0, "step": 807 }, { "epoch": 0.11034106039397767, "grad_norm": 0.0007610208704136312, "learning_rate": 0.0001668609894124461, "loss": 46.0, "step": 808 }, { "epoch": 0.11047762111228705, "grad_norm": 0.0008387729176320136, "learning_rate": 0.00016678282107482502, "loss": 46.0, "step": 809 }, { "epoch": 0.11061418183059643, "grad_norm": 0.0005413633771240711, "learning_rate": 0.00016670457901096328, "loss": 46.0, "step": 810 }, { "epoch": 0.11075074254890581, "grad_norm": 0.0008596319821663201, "learning_rate": 0.0001666262633072378, "loss": 46.0, "step": 811 }, { "epoch": 0.11088730326721519, "grad_norm": 0.0005809023859910667, "learning_rate": 0.0001665478740501067, "loss": 46.0, "step": 812 }, { "epoch": 0.11102386398552457, "grad_norm": 0.0005002027610316873, "learning_rate": 0.00016646941132610947, "loss": 46.0, "step": 813 }, { "epoch": 0.11116042470383394, "grad_norm": 0.0005074172513559461, "learning_rate": 0.0001663908752218666, "loss": 46.0, "step": 814 }, { "epoch": 0.11129698542214332, "grad_norm": 0.00038702471647411585, "learning_rate": 0.00016631226582407952, "loss": 46.0, "step": 815 }, { "epoch": 0.1114335461404527, "grad_norm": 0.0005355363246053457, "learning_rate": 0.00016623358321953078, "loss": 46.0, "step": 816 }, { "epoch": 0.11157010685876208, "grad_norm": 0.000400405318941921, "learning_rate": 0.00016615482749508356, "loss": 46.0, "step": 817 }, { "epoch": 0.11170666757707146, "grad_norm": 0.0003890878870151937, "learning_rate": 0.00016607599873768182, "loss": 46.0, "step": 818 }, { "epoch": 0.11184322829538083, "grad_norm": 0.0006321795517578721, "learning_rate": 0.0001659970970343502, "loss": 46.0, "step": 819 }, { "epoch": 0.11197978901369021, "grad_norm": 0.0007353540859185159, "learning_rate": 0.00016591812247219377, "loss": 46.0, "step": 820 }, { "epoch": 0.11211634973199959, "grad_norm": 0.0012658998603001237, "learning_rate": 0.00016583907513839817, "loss": 46.0, "step": 821 }, { "epoch": 0.11225291045030897, "grad_norm": 0.0012359356041997671, "learning_rate": 0.00016575995512022921, "loss": 46.0, "step": 822 }, { "epoch": 0.11238947116861835, "grad_norm": 0.00024977774592116475, "learning_rate": 0.00016568076250503304, "loss": 46.0, "step": 823 }, { "epoch": 0.11252603188692772, "grad_norm": 0.0007921140058897436, "learning_rate": 0.000165601497380236, "loss": 46.0, "step": 824 }, { "epoch": 0.1126625926052371, "grad_norm": 0.00028611160814762115, "learning_rate": 0.00016552215983334437, "loss": 46.0, "step": 825 }, { "epoch": 0.11279915332354648, "grad_norm": 0.00045781530207023025, "learning_rate": 0.00016544274995194448, "loss": 46.0, "step": 826 }, { "epoch": 0.11293571404185586, "grad_norm": 0.0010054416488856077, "learning_rate": 0.0001653632678237024, "loss": 46.0, "step": 827 }, { "epoch": 0.11307227476016524, "grad_norm": 0.0003318900417070836, "learning_rate": 0.00016528371353636406, "loss": 46.0, "step": 828 }, { "epoch": 0.11320883547847461, "grad_norm": 0.0006165113882161677, "learning_rate": 0.00016520408717775507, "loss": 46.0, "step": 829 }, { "epoch": 0.11334539619678399, "grad_norm": 0.0005957252578809857, "learning_rate": 0.00016512438883578044, "loss": 46.0, "step": 830 }, { "epoch": 0.11348195691509337, "grad_norm": 0.0002892489719670266, "learning_rate": 0.00016504461859842486, "loss": 46.0, "step": 831 }, { "epoch": 0.11361851763340275, "grad_norm": 0.00041660640272311866, "learning_rate": 0.00016496477655375227, "loss": 46.0, "step": 832 }, { "epoch": 0.11375507835171213, "grad_norm": 0.00023521836556028575, "learning_rate": 0.00016488486278990586, "loss": 46.0, "step": 833 }, { "epoch": 0.1138916390700215, "grad_norm": 0.0009512827964499593, "learning_rate": 0.00016480487739510807, "loss": 46.0, "step": 834 }, { "epoch": 0.11402819978833088, "grad_norm": 0.0008759453776292503, "learning_rate": 0.00016472482045766043, "loss": 46.0, "step": 835 }, { "epoch": 0.11416476050664026, "grad_norm": 0.000294568162644282, "learning_rate": 0.00016464469206594332, "loss": 46.0, "step": 836 }, { "epoch": 0.11430132122494964, "grad_norm": 0.0007791619864292443, "learning_rate": 0.00016456449230841617, "loss": 46.0, "step": 837 }, { "epoch": 0.11443788194325902, "grad_norm": 0.0015861823922023177, "learning_rate": 0.00016448422127361706, "loss": 46.0, "step": 838 }, { "epoch": 0.1145744426615684, "grad_norm": 0.0011900209356099367, "learning_rate": 0.00016440387905016285, "loss": 46.0, "step": 839 }, { "epoch": 0.11471100337987777, "grad_norm": 0.0017578421393409371, "learning_rate": 0.00016432346572674896, "loss": 46.0, "step": 840 }, { "epoch": 0.11484756409818715, "grad_norm": 0.0018410708289593458, "learning_rate": 0.00016424298139214929, "loss": 46.0, "step": 841 }, { "epoch": 0.11498412481649653, "grad_norm": 0.00044194411020725965, "learning_rate": 0.0001641624261352161, "loss": 46.0, "step": 842 }, { "epoch": 0.11512068553480591, "grad_norm": 0.0007466517854481936, "learning_rate": 0.00016408180004488007, "loss": 46.0, "step": 843 }, { "epoch": 0.11525724625311529, "grad_norm": 0.0008741529891267419, "learning_rate": 0.00016400110321014992, "loss": 46.0, "step": 844 }, { "epoch": 0.11539380697142466, "grad_norm": 0.0008493126952089369, "learning_rate": 0.00016392033572011261, "loss": 46.0, "step": 845 }, { "epoch": 0.11553036768973404, "grad_norm": 0.0007634422508999705, "learning_rate": 0.000163839497663933, "loss": 46.0, "step": 846 }, { "epoch": 0.11566692840804342, "grad_norm": 0.0005714827566407621, "learning_rate": 0.0001637585891308539, "loss": 46.0, "step": 847 }, { "epoch": 0.1158034891263528, "grad_norm": 0.0008292211568914354, "learning_rate": 0.0001636776102101959, "loss": 46.0, "step": 848 }, { "epoch": 0.11594004984466218, "grad_norm": 0.00044870973215438426, "learning_rate": 0.00016359656099135733, "loss": 46.0, "step": 849 }, { "epoch": 0.11607661056297155, "grad_norm": 0.0012383662397041917, "learning_rate": 0.00016351544156381414, "loss": 46.0, "step": 850 }, { "epoch": 0.11621317128128093, "grad_norm": 0.0004602587141562253, "learning_rate": 0.00016343425201711966, "loss": 46.0, "step": 851 }, { "epoch": 0.11634973199959031, "grad_norm": 0.0005955328815616667, "learning_rate": 0.00016335299244090478, "loss": 46.0, "step": 852 }, { "epoch": 0.11648629271789969, "grad_norm": 0.0005287445383146405, "learning_rate": 0.0001632716629248777, "loss": 46.0, "step": 853 }, { "epoch": 0.11662285343620908, "grad_norm": 0.0004926318651996553, "learning_rate": 0.0001631902635588237, "loss": 46.0, "step": 854 }, { "epoch": 0.11675941415451846, "grad_norm": 0.0010656327940523624, "learning_rate": 0.00016310879443260528, "loss": 46.0, "step": 855 }, { "epoch": 0.11689597487282784, "grad_norm": 0.0005354030872695148, "learning_rate": 0.00016302725563616192, "loss": 46.0, "step": 856 }, { "epoch": 0.11703253559113722, "grad_norm": 0.0005059898248873651, "learning_rate": 0.00016294564725951002, "loss": 46.0, "step": 857 }, { "epoch": 0.1171690963094466, "grad_norm": 0.0004201128613203764, "learning_rate": 0.0001628639693927428, "loss": 46.0, "step": 858 }, { "epoch": 0.11730565702775597, "grad_norm": 0.0003003796737175435, "learning_rate": 0.00016278222212603018, "loss": 46.0, "step": 859 }, { "epoch": 0.11744221774606535, "grad_norm": 0.0005390554433688521, "learning_rate": 0.00016270040554961868, "loss": 46.0, "step": 860 }, { "epoch": 0.11757877846437473, "grad_norm": 0.0015715021872892976, "learning_rate": 0.00016261851975383137, "loss": 46.0, "step": 861 }, { "epoch": 0.1177153391826841, "grad_norm": 0.0010319905122742057, "learning_rate": 0.00016253656482906776, "loss": 46.0, "step": 862 }, { "epoch": 0.11785189990099348, "grad_norm": 0.000347215129295364, "learning_rate": 0.0001624545408658036, "loss": 46.0, "step": 863 }, { "epoch": 0.11798846061930286, "grad_norm": 0.0006791274063289165, "learning_rate": 0.00016237244795459086, "loss": 46.0, "step": 864 }, { "epoch": 0.11812502133761224, "grad_norm": 0.00031977854087017477, "learning_rate": 0.00016229028618605775, "loss": 46.0, "step": 865 }, { "epoch": 0.11826158205592162, "grad_norm": 0.0037054885178804398, "learning_rate": 0.00016220805565090836, "loss": 46.0, "step": 866 }, { "epoch": 0.118398142774231, "grad_norm": 0.0005012017791159451, "learning_rate": 0.00016212575643992277, "loss": 46.0, "step": 867 }, { "epoch": 0.11853470349254037, "grad_norm": 0.00028602650854736567, "learning_rate": 0.00016204338864395684, "loss": 46.0, "step": 868 }, { "epoch": 0.11867126421084975, "grad_norm": 0.0006842486909590662, "learning_rate": 0.00016196095235394207, "loss": 46.0, "step": 869 }, { "epoch": 0.11880782492915913, "grad_norm": 0.000561655790079385, "learning_rate": 0.00016187844766088586, "loss": 46.0, "step": 870 }, { "epoch": 0.11894438564746851, "grad_norm": 0.002418296178802848, "learning_rate": 0.00016179587465587077, "loss": 46.0, "step": 871 }, { "epoch": 0.11908094636577789, "grad_norm": 0.00027192741981707513, "learning_rate": 0.00016171323343005498, "loss": 46.0, "step": 872 }, { "epoch": 0.11921750708408727, "grad_norm": 0.0004957255441695452, "learning_rate": 0.0001616305240746719, "loss": 46.0, "step": 873 }, { "epoch": 0.11935406780239664, "grad_norm": 0.00048530122148804367, "learning_rate": 0.00016154774668103027, "loss": 46.0, "step": 874 }, { "epoch": 0.11949062852070602, "grad_norm": 0.0009217667393386364, "learning_rate": 0.0001614649013405138, "loss": 46.0, "step": 875 }, { "epoch": 0.1196271892390154, "grad_norm": 0.0015625512460246682, "learning_rate": 0.0001613819881445813, "loss": 46.0, "step": 876 }, { "epoch": 0.11976374995732478, "grad_norm": 0.0009254494798369706, "learning_rate": 0.00016129900718476637, "loss": 46.0, "step": 877 }, { "epoch": 0.11990031067563416, "grad_norm": 0.00046171454596333206, "learning_rate": 0.00016121595855267767, "loss": 46.0, "step": 878 }, { "epoch": 0.12003687139394353, "grad_norm": 0.0003148307732772082, "learning_rate": 0.0001611328423399983, "loss": 46.0, "step": 879 }, { "epoch": 0.12017343211225291, "grad_norm": 0.0004730523796752095, "learning_rate": 0.00016104965863848617, "loss": 46.0, "step": 880 }, { "epoch": 0.12030999283056229, "grad_norm": 0.001140785519964993, "learning_rate": 0.00016096640753997346, "loss": 46.0, "step": 881 }, { "epoch": 0.12044655354887167, "grad_norm": 0.0007769656367599964, "learning_rate": 0.00016088308913636703, "loss": 46.0, "step": 882 }, { "epoch": 0.12058311426718105, "grad_norm": 0.000732703018002212, "learning_rate": 0.00016079970351964783, "loss": 46.0, "step": 883 }, { "epoch": 0.12071967498549042, "grad_norm": 0.0007818607264198363, "learning_rate": 0.00016071625078187114, "loss": 46.0, "step": 884 }, { "epoch": 0.1208562357037998, "grad_norm": 0.0006149871041998267, "learning_rate": 0.00016063273101516625, "loss": 46.0, "step": 885 }, { "epoch": 0.12099279642210918, "grad_norm": 0.0008054501377046108, "learning_rate": 0.00016054914431173654, "loss": 46.0, "step": 886 }, { "epoch": 0.12112935714041856, "grad_norm": 0.0008161257137544453, "learning_rate": 0.0001604654907638592, "loss": 46.0, "step": 887 }, { "epoch": 0.12126591785872794, "grad_norm": 0.0030461640562862158, "learning_rate": 0.00016038177046388523, "loss": 46.0, "step": 888 }, { "epoch": 0.12140247857703731, "grad_norm": 0.0007848728564567864, "learning_rate": 0.0001602979835042394, "loss": 46.0, "step": 889 }, { "epoch": 0.12153903929534669, "grad_norm": 0.005675900261849165, "learning_rate": 0.00016021412997741993, "loss": 46.0, "step": 890 }, { "epoch": 0.12167560001365607, "grad_norm": 0.00039349167491309345, "learning_rate": 0.0001601302099759987, "loss": 46.0, "step": 891 }, { "epoch": 0.12181216073196545, "grad_norm": 0.0006075625424273312, "learning_rate": 0.00016004622359262085, "loss": 46.0, "step": 892 }, { "epoch": 0.12194872145027483, "grad_norm": 0.0003772106138058007, "learning_rate": 0.0001599621709200048, "loss": 46.0, "step": 893 }, { "epoch": 0.1220852821685842, "grad_norm": 0.0015227465191856027, "learning_rate": 0.00015987805205094227, "loss": 46.0, "step": 894 }, { "epoch": 0.12222184288689358, "grad_norm": 0.00073139468440786, "learning_rate": 0.00015979386707829792, "loss": 46.0, "step": 895 }, { "epoch": 0.12235840360520296, "grad_norm": 0.0011622250312939286, "learning_rate": 0.00015970961609500944, "loss": 46.0, "step": 896 }, { "epoch": 0.12249496432351234, "grad_norm": 0.001449939445592463, "learning_rate": 0.00015962529919408746, "loss": 46.0, "step": 897 }, { "epoch": 0.12263152504182172, "grad_norm": 0.0005385968834161758, "learning_rate": 0.00015954091646861525, "loss": 46.0, "step": 898 }, { "epoch": 0.1227680857601311, "grad_norm": 0.0007052323780953884, "learning_rate": 0.00015945646801174886, "loss": 46.0, "step": 899 }, { "epoch": 0.12290464647844047, "grad_norm": 0.001713512814603746, "learning_rate": 0.0001593719539167169, "loss": 46.0, "step": 900 }, { "epoch": 0.12290464647844047, "eval_loss": 11.5, "eval_runtime": 20.6643, "eval_samples_per_second": 149.243, "eval_steps_per_second": 74.621, "step": 900 }, { "epoch": 0.12304120719674985, "grad_norm": 0.0009241271764039993, "learning_rate": 0.00015928737427682032, "loss": 46.0, "step": 901 }, { "epoch": 0.12317776791505923, "grad_norm": 0.0003521353646647185, "learning_rate": 0.00015920272918543257, "loss": 46.0, "step": 902 }, { "epoch": 0.12331432863336861, "grad_norm": 0.0004476907488424331, "learning_rate": 0.00015911801873599933, "loss": 46.0, "step": 903 }, { "epoch": 0.12345088935167799, "grad_norm": 0.0008625802001915872, "learning_rate": 0.00015903324302203836, "loss": 46.0, "step": 904 }, { "epoch": 0.12358745006998736, "grad_norm": 0.000347345310728997, "learning_rate": 0.00015894840213713952, "loss": 46.0, "step": 905 }, { "epoch": 0.12372401078829674, "grad_norm": 0.00042961168219335377, "learning_rate": 0.0001588634961749646, "loss": 46.0, "step": 906 }, { "epoch": 0.12386057150660612, "grad_norm": 0.0005479567334987223, "learning_rate": 0.00015877852522924732, "loss": 46.0, "step": 907 }, { "epoch": 0.1239971322249155, "grad_norm": 0.000546832219697535, "learning_rate": 0.00015869348939379302, "loss": 46.0, "step": 908 }, { "epoch": 0.12413369294322488, "grad_norm": 0.0005068538011983037, "learning_rate": 0.0001586083887624787, "loss": 46.0, "step": 909 }, { "epoch": 0.12427025366153426, "grad_norm": 0.0005834728945046663, "learning_rate": 0.00015852322342925295, "loss": 46.0, "step": 910 }, { "epoch": 0.12440681437984363, "grad_norm": 0.0004933670861646533, "learning_rate": 0.00015843799348813574, "loss": 46.0, "step": 911 }, { "epoch": 0.12454337509815301, "grad_norm": 0.0011595729738473892, "learning_rate": 0.0001583526990332184, "loss": 46.0, "step": 912 }, { "epoch": 0.12467993581646239, "grad_norm": 0.00029905018163844943, "learning_rate": 0.00015826734015866344, "loss": 46.0, "step": 913 }, { "epoch": 0.12481649653477177, "grad_norm": 0.000434244517236948, "learning_rate": 0.00015818191695870452, "loss": 46.0, "step": 914 }, { "epoch": 0.12495305725308115, "grad_norm": 0.0006040096050128341, "learning_rate": 0.00015809642952764632, "loss": 46.0, "step": 915 }, { "epoch": 0.12508961797139054, "grad_norm": 0.0007016469608061016, "learning_rate": 0.00015801087795986438, "loss": 46.0, "step": 916 }, { "epoch": 0.1252261786896999, "grad_norm": 0.0003513791016303003, "learning_rate": 0.0001579252623498051, "loss": 46.0, "step": 917 }, { "epoch": 0.1253627394080093, "grad_norm": 0.0003278540389146656, "learning_rate": 0.0001578395827919855, "loss": 46.0, "step": 918 }, { "epoch": 0.12549930012631866, "grad_norm": 0.000646027154289186, "learning_rate": 0.00015775383938099332, "loss": 46.0, "step": 919 }, { "epoch": 0.12563586084462805, "grad_norm": 0.0005723676295019686, "learning_rate": 0.00015766803221148673, "loss": 46.0, "step": 920 }, { "epoch": 0.12577242156293741, "grad_norm": 0.0015359300887212157, "learning_rate": 0.00015758216137819422, "loss": 46.0, "step": 921 }, { "epoch": 0.1259089822812468, "grad_norm": 0.00043970157275907695, "learning_rate": 0.0001574962269759147, "loss": 46.0, "step": 922 }, { "epoch": 0.12604554299955617, "grad_norm": 0.00028622214449569583, "learning_rate": 0.00015741022909951716, "loss": 46.0, "step": 923 }, { "epoch": 0.12618210371786556, "grad_norm": 0.00038283158210106194, "learning_rate": 0.00015732416784394065, "loss": 46.0, "step": 924 }, { "epoch": 0.12631866443617493, "grad_norm": 0.00039500248385593295, "learning_rate": 0.00015723804330419422, "loss": 46.0, "step": 925 }, { "epoch": 0.12645522515448432, "grad_norm": 0.0004930765135213733, "learning_rate": 0.00015715185557535689, "loss": 46.0, "step": 926 }, { "epoch": 0.12659178587279368, "grad_norm": 0.0005405626725405455, "learning_rate": 0.00015706560475257727, "loss": 46.0, "step": 927 }, { "epoch": 0.12672834659110307, "grad_norm": 0.00043443331378512084, "learning_rate": 0.00015697929093107365, "loss": 46.0, "step": 928 }, { "epoch": 0.12686490730941244, "grad_norm": 0.0005110527272336185, "learning_rate": 0.000156892914206134, "loss": 46.0, "step": 929 }, { "epoch": 0.12700146802772183, "grad_norm": 0.00031187915010377765, "learning_rate": 0.00015680647467311557, "loss": 46.0, "step": 930 }, { "epoch": 0.1271380287460312, "grad_norm": 0.0001981136156246066, "learning_rate": 0.00015671997242744511, "loss": 46.0, "step": 931 }, { "epoch": 0.1272745894643406, "grad_norm": 0.0026429896242916584, "learning_rate": 0.00015663340756461844, "loss": 46.0, "step": 932 }, { "epoch": 0.12741115018264995, "grad_norm": 0.0008942155982367694, "learning_rate": 0.0001565467801802006, "loss": 46.0, "step": 933 }, { "epoch": 0.12754771090095934, "grad_norm": 0.0006840588175691664, "learning_rate": 0.00015646009036982567, "loss": 46.0, "step": 934 }, { "epoch": 0.1276842716192687, "grad_norm": 0.0009611474233679473, "learning_rate": 0.00015637333822919656, "loss": 46.0, "step": 935 }, { "epoch": 0.1278208323375781, "grad_norm": 0.000530791119672358, "learning_rate": 0.00015628652385408508, "loss": 46.0, "step": 936 }, { "epoch": 0.12795739305588746, "grad_norm": 0.00041494445758871734, "learning_rate": 0.00015619964734033172, "loss": 46.0, "step": 937 }, { "epoch": 0.12809395377419686, "grad_norm": 0.0008938443497754633, "learning_rate": 0.00015611270878384552, "loss": 46.0, "step": 938 }, { "epoch": 0.12823051449250622, "grad_norm": 0.000739588460419327, "learning_rate": 0.00015602570828060407, "loss": 46.0, "step": 939 }, { "epoch": 0.1283670752108156, "grad_norm": 0.0025961471255868673, "learning_rate": 0.00015593864592665333, "loss": 46.0, "step": 940 }, { "epoch": 0.12850363592912498, "grad_norm": 0.00049801473505795, "learning_rate": 0.00015585152181810753, "loss": 46.0, "step": 941 }, { "epoch": 0.12864019664743437, "grad_norm": 0.000621246756054461, "learning_rate": 0.00015576433605114912, "loss": 46.0, "step": 942 }, { "epoch": 0.12877675736574373, "grad_norm": 0.0007328742649406195, "learning_rate": 0.00015567708872202854, "loss": 46.0, "step": 943 }, { "epoch": 0.12891331808405312, "grad_norm": 0.005625641904771328, "learning_rate": 0.00015558977992706426, "loss": 46.0, "step": 944 }, { "epoch": 0.1290498788023625, "grad_norm": 0.0005238280282355845, "learning_rate": 0.00015550240976264253, "loss": 46.0, "step": 945 }, { "epoch": 0.12918643952067188, "grad_norm": 0.001820914214476943, "learning_rate": 0.0001554149783252175, "loss": 46.0, "step": 946 }, { "epoch": 0.12932300023898124, "grad_norm": 0.0006223174277693033, "learning_rate": 0.0001553274857113108, "loss": 46.0, "step": 947 }, { "epoch": 0.12945956095729064, "grad_norm": 0.0015029089991003275, "learning_rate": 0.00015523993201751167, "loss": 46.0, "step": 948 }, { "epoch": 0.1295961216756, "grad_norm": 0.0012488181237131357, "learning_rate": 0.00015515231734047677, "loss": 46.0, "step": 949 }, { "epoch": 0.1297326823939094, "grad_norm": 0.0007277615368366241, "learning_rate": 0.0001550646417769301, "loss": 46.0, "step": 950 }, { "epoch": 0.12986924311221876, "grad_norm": 0.0024758039508014917, "learning_rate": 0.0001549769054236629, "loss": 46.0, "step": 951 }, { "epoch": 0.13000580383052815, "grad_norm": 0.00043562057544477284, "learning_rate": 0.00015488910837753342, "loss": 46.0, "step": 952 }, { "epoch": 0.1301423645488375, "grad_norm": 0.0006010486977174878, "learning_rate": 0.00015480125073546704, "loss": 46.0, "step": 953 }, { "epoch": 0.1302789252671469, "grad_norm": 0.00039981160080060363, "learning_rate": 0.0001547133325944559, "loss": 46.0, "step": 954 }, { "epoch": 0.13041548598545627, "grad_norm": 0.00056524045066908, "learning_rate": 0.00015462535405155902, "loss": 46.0, "step": 955 }, { "epoch": 0.13055204670376566, "grad_norm": 0.00019664541468955576, "learning_rate": 0.00015453731520390215, "loss": 46.0, "step": 956 }, { "epoch": 0.13068860742207505, "grad_norm": 0.000747493002563715, "learning_rate": 0.0001544492161486775, "loss": 46.0, "step": 957 }, { "epoch": 0.13082516814038442, "grad_norm": 0.00021618347091134638, "learning_rate": 0.00015436105698314384, "loss": 46.0, "step": 958 }, { "epoch": 0.1309617288586938, "grad_norm": 0.0005658991285599768, "learning_rate": 0.0001542728378046262, "loss": 46.0, "step": 959 }, { "epoch": 0.13109828957700317, "grad_norm": 0.0010320099536329508, "learning_rate": 0.00015418455871051592, "loss": 46.0, "step": 960 }, { "epoch": 0.13123485029531257, "grad_norm": 0.00037752182106487453, "learning_rate": 0.00015409621979827048, "loss": 46.0, "step": 961 }, { "epoch": 0.13137141101362193, "grad_norm": 0.0003615982714109123, "learning_rate": 0.0001540078211654135, "loss": 46.0, "step": 962 }, { "epoch": 0.13150797173193132, "grad_norm": 0.00041104850242845714, "learning_rate": 0.0001539193629095343, "loss": 46.0, "step": 963 }, { "epoch": 0.1316445324502407, "grad_norm": 0.0005330987041816115, "learning_rate": 0.00015383084512828824, "loss": 46.0, "step": 964 }, { "epoch": 0.13178109316855008, "grad_norm": 0.0003800500126089901, "learning_rate": 0.00015374226791939628, "loss": 46.0, "step": 965 }, { "epoch": 0.13191765388685944, "grad_norm": 0.0006545698852278292, "learning_rate": 0.000153653631380645, "loss": 46.0, "step": 966 }, { "epoch": 0.13205421460516883, "grad_norm": 0.0020852810703217983, "learning_rate": 0.0001535649356098865, "loss": 46.0, "step": 967 }, { "epoch": 0.1321907753234782, "grad_norm": 0.00031325622694566846, "learning_rate": 0.00015347618070503827, "loss": 46.0, "step": 968 }, { "epoch": 0.1323273360417876, "grad_norm": 0.00019234443607274443, "learning_rate": 0.0001533873667640831, "loss": 46.0, "step": 969 }, { "epoch": 0.13246389676009696, "grad_norm": 0.00038069483707658947, "learning_rate": 0.00015329849388506886, "loss": 46.0, "step": 970 }, { "epoch": 0.13260045747840635, "grad_norm": 0.00045511999633163214, "learning_rate": 0.00015320956216610866, "loss": 46.0, "step": 971 }, { "epoch": 0.1327370181967157, "grad_norm": 0.0005886334110982716, "learning_rate": 0.00015312057170538035, "loss": 46.0, "step": 972 }, { "epoch": 0.1328735789150251, "grad_norm": 0.0009905985789373517, "learning_rate": 0.00015303152260112682, "loss": 46.0, "step": 973 }, { "epoch": 0.13301013963333447, "grad_norm": 0.0010108448332175612, "learning_rate": 0.00015294241495165557, "loss": 46.0, "step": 974 }, { "epoch": 0.13314670035164386, "grad_norm": 0.0005389642901718616, "learning_rate": 0.00015285324885533884, "loss": 46.0, "step": 975 }, { "epoch": 0.13328326106995322, "grad_norm": 0.000410493987146765, "learning_rate": 0.0001527640244106133, "loss": 46.0, "step": 976 }, { "epoch": 0.13341982178826262, "grad_norm": 0.001756677869707346, "learning_rate": 0.00015267474171598005, "loss": 46.0, "step": 977 }, { "epoch": 0.13355638250657198, "grad_norm": 0.0003254815237596631, "learning_rate": 0.0001525854008700046, "loss": 46.0, "step": 978 }, { "epoch": 0.13369294322488137, "grad_norm": 0.0003471802920103073, "learning_rate": 0.00015249600197131651, "loss": 46.0, "step": 979 }, { "epoch": 0.13382950394319074, "grad_norm": 0.0015930512454360723, "learning_rate": 0.0001524065451186095, "loss": 46.0, "step": 980 }, { "epoch": 0.13396606466150013, "grad_norm": 0.00028746266616508365, "learning_rate": 0.0001523170304106413, "loss": 46.0, "step": 981 }, { "epoch": 0.1341026253798095, "grad_norm": 0.0005899532698094845, "learning_rate": 0.0001522274579462334, "loss": 46.0, "step": 982 }, { "epoch": 0.13423918609811888, "grad_norm": 0.0005316737224347889, "learning_rate": 0.00015213782782427123, "loss": 46.0, "step": 983 }, { "epoch": 0.13437574681642825, "grad_norm": 0.00045934764784760773, "learning_rate": 0.00015204814014370372, "loss": 46.0, "step": 984 }, { "epoch": 0.13451230753473764, "grad_norm": 0.001285345759242773, "learning_rate": 0.00015195839500354335, "loss": 46.0, "step": 985 }, { "epoch": 0.134648868253047, "grad_norm": 0.0005873920163139701, "learning_rate": 0.00015186859250286615, "loss": 46.0, "step": 986 }, { "epoch": 0.1347854289713564, "grad_norm": 0.0011875568889081478, "learning_rate": 0.00015177873274081137, "loss": 46.0, "step": 987 }, { "epoch": 0.13492198968966576, "grad_norm": 0.001056193490512669, "learning_rate": 0.00015168881581658147, "loss": 46.0, "step": 988 }, { "epoch": 0.13505855040797515, "grad_norm": 0.0009340514661744237, "learning_rate": 0.00015159884182944211, "loss": 46.0, "step": 989 }, { "epoch": 0.13519511112628452, "grad_norm": 0.0011555576929822564, "learning_rate": 0.00015150881087872185, "loss": 46.0, "step": 990 }, { "epoch": 0.1353316718445939, "grad_norm": 0.001084683695808053, "learning_rate": 0.00015141872306381215, "loss": 46.0, "step": 991 }, { "epoch": 0.13546823256290327, "grad_norm": 0.000519106222782284, "learning_rate": 0.00015132857848416733, "loss": 46.0, "step": 992 }, { "epoch": 0.13560479328121267, "grad_norm": 0.001280359923839569, "learning_rate": 0.00015123837723930424, "loss": 46.0, "step": 993 }, { "epoch": 0.13574135399952203, "grad_norm": 0.0007028987165540457, "learning_rate": 0.00015114811942880242, "loss": 46.0, "step": 994 }, { "epoch": 0.13587791471783142, "grad_norm": 0.0005714019644074142, "learning_rate": 0.00015105780515230376, "loss": 46.0, "step": 995 }, { "epoch": 0.13601447543614079, "grad_norm": 0.0009716853965073824, "learning_rate": 0.00015096743450951258, "loss": 46.0, "step": 996 }, { "epoch": 0.13615103615445018, "grad_norm": 0.0006497084395959973, "learning_rate": 0.00015087700760019532, "loss": 46.0, "step": 997 }, { "epoch": 0.13628759687275954, "grad_norm": 0.0013521420769393444, "learning_rate": 0.00015078652452418063, "loss": 46.0, "step": 998 }, { "epoch": 0.13642415759106893, "grad_norm": 0.0008505037403665483, "learning_rate": 0.00015069598538135906, "loss": 46.0, "step": 999 }, { "epoch": 0.1365607183093783, "grad_norm": 0.0011916455114260316, "learning_rate": 0.00015060539027168316, "loss": 46.0, "step": 1000 }, { "epoch": 0.1366972790276877, "grad_norm": 0.0005008620209991932, "learning_rate": 0.00015051473929516722, "loss": 46.0, "step": 1001 }, { "epoch": 0.13683383974599705, "grad_norm": 0.0006068138754926622, "learning_rate": 0.00015042403255188723, "loss": 46.0, "step": 1002 }, { "epoch": 0.13697040046430645, "grad_norm": 0.0003954765561502427, "learning_rate": 0.00015033327014198075, "loss": 46.0, "step": 1003 }, { "epoch": 0.1371069611826158, "grad_norm": 0.0005776135949417949, "learning_rate": 0.00015024245216564667, "loss": 46.0, "step": 1004 }, { "epoch": 0.1372435219009252, "grad_norm": 0.0008297267486341298, "learning_rate": 0.00015015157872314542, "loss": 46.0, "step": 1005 }, { "epoch": 0.13738008261923457, "grad_norm": 0.000555426231585443, "learning_rate": 0.00015006064991479853, "loss": 46.0, "step": 1006 }, { "epoch": 0.13751664333754396, "grad_norm": 0.0002457842347212136, "learning_rate": 0.0001499696658409887, "loss": 46.0, "step": 1007 }, { "epoch": 0.13765320405585332, "grad_norm": 0.0005408066790550947, "learning_rate": 0.00014987862660215966, "loss": 46.0, "step": 1008 }, { "epoch": 0.13778976477416272, "grad_norm": 0.0010539703071117401, "learning_rate": 0.00014978753229881594, "loss": 46.0, "step": 1009 }, { "epoch": 0.13792632549247208, "grad_norm": 0.00045947683975100517, "learning_rate": 0.00014969638303152295, "loss": 46.0, "step": 1010 }, { "epoch": 0.13806288621078147, "grad_norm": 0.0004952670424245298, "learning_rate": 0.0001496051789009068, "loss": 46.0, "step": 1011 }, { "epoch": 0.13819944692909084, "grad_norm": 0.0006827415782026947, "learning_rate": 0.00014951392000765411, "loss": 46.0, "step": 1012 }, { "epoch": 0.13833600764740023, "grad_norm": 0.000486463715787977, "learning_rate": 0.000149422606452512, "loss": 46.0, "step": 1013 }, { "epoch": 0.1384725683657096, "grad_norm": 0.0002832711033988744, "learning_rate": 0.00014933123833628785, "loss": 46.0, "step": 1014 }, { "epoch": 0.13860912908401898, "grad_norm": 0.0004940008511766791, "learning_rate": 0.00014923981575984936, "loss": 46.0, "step": 1015 }, { "epoch": 0.13874568980232835, "grad_norm": 0.0008655837154947221, "learning_rate": 0.00014914833882412435, "loss": 46.0, "step": 1016 }, { "epoch": 0.13888225052063774, "grad_norm": 0.0007375786663033068, "learning_rate": 0.00014905680763010058, "loss": 46.0, "step": 1017 }, { "epoch": 0.1390188112389471, "grad_norm": 0.0006843106239102781, "learning_rate": 0.00014896522227882578, "loss": 46.0, "step": 1018 }, { "epoch": 0.1391553719572565, "grad_norm": 0.001130104181356728, "learning_rate": 0.00014887358287140744, "loss": 46.0, "step": 1019 }, { "epoch": 0.13929193267556586, "grad_norm": 0.0008909485186450183, "learning_rate": 0.00014878188950901276, "loss": 46.0, "step": 1020 }, { "epoch": 0.13942849339387525, "grad_norm": 0.005642786156386137, "learning_rate": 0.0001486901422928684, "loss": 46.0, "step": 1021 }, { "epoch": 0.13956505411218462, "grad_norm": 0.0005347135593183339, "learning_rate": 0.0001485983413242606, "loss": 46.0, "step": 1022 }, { "epoch": 0.139701614830494, "grad_norm": 0.000356485164957121, "learning_rate": 0.00014850648670453493, "loss": 46.0, "step": 1023 }, { "epoch": 0.13983817554880337, "grad_norm": 0.0006373700452968478, "learning_rate": 0.00014841457853509606, "loss": 46.0, "step": 1024 }, { "epoch": 0.13997473626711276, "grad_norm": 0.00020091190526727587, "learning_rate": 0.0001483226169174079, "loss": 46.0, "step": 1025 }, { "epoch": 0.14011129698542216, "grad_norm": 0.00042303564259782434, "learning_rate": 0.00014823060195299337, "loss": 46.0, "step": 1026 }, { "epoch": 0.14024785770373152, "grad_norm": 0.0004543966497294605, "learning_rate": 0.00014813853374343419, "loss": 46.0, "step": 1027 }, { "epoch": 0.1403844184220409, "grad_norm": 0.00042528280755504966, "learning_rate": 0.00014804641239037097, "loss": 46.0, "step": 1028 }, { "epoch": 0.14052097914035028, "grad_norm": 0.0005864663980901241, "learning_rate": 0.00014795423799550284, "loss": 46.0, "step": 1029 }, { "epoch": 0.14065753985865967, "grad_norm": 0.0005385400145314634, "learning_rate": 0.00014786201066058766, "loss": 46.0, "step": 1030 }, { "epoch": 0.14079410057696903, "grad_norm": 0.0011653905967250466, "learning_rate": 0.00014776973048744165, "loss": 46.0, "step": 1031 }, { "epoch": 0.14093066129527843, "grad_norm": 0.0004561395035125315, "learning_rate": 0.0001476773975779393, "loss": 46.0, "step": 1032 }, { "epoch": 0.1410672220135878, "grad_norm": 0.000643297506030649, "learning_rate": 0.00014758501203401348, "loss": 46.0, "step": 1033 }, { "epoch": 0.14120378273189718, "grad_norm": 0.0006550021353177726, "learning_rate": 0.00014749257395765502, "loss": 46.0, "step": 1034 }, { "epoch": 0.14134034345020655, "grad_norm": 0.0007623559795320034, "learning_rate": 0.0001474000834509128, "loss": 46.0, "step": 1035 }, { "epoch": 0.14147690416851594, "grad_norm": 0.000623580242972821, "learning_rate": 0.00014730754061589355, "loss": 46.0, "step": 1036 }, { "epoch": 0.1416134648868253, "grad_norm": 0.0010572531027719378, "learning_rate": 0.00014721494555476188, "loss": 46.0, "step": 1037 }, { "epoch": 0.1417500256051347, "grad_norm": 0.0011201991001144052, "learning_rate": 0.00014712229836973988, "loss": 46.0, "step": 1038 }, { "epoch": 0.14188658632344406, "grad_norm": 0.001960804220288992, "learning_rate": 0.00014702959916310736, "loss": 46.0, "step": 1039 }, { "epoch": 0.14202314704175345, "grad_norm": 0.0050703976303339005, "learning_rate": 0.00014693684803720138, "loss": 46.0, "step": 1040 }, { "epoch": 0.14215970776006281, "grad_norm": 0.00040476518915966153, "learning_rate": 0.0001468440450944165, "loss": 46.0, "step": 1041 }, { "epoch": 0.1422962684783722, "grad_norm": 0.0007858510361984372, "learning_rate": 0.00014675119043720437, "loss": 46.0, "step": 1042 }, { "epoch": 0.14243282919668157, "grad_norm": 0.0007758094579912722, "learning_rate": 0.0001466582841680737, "loss": 46.0, "step": 1043 }, { "epoch": 0.14256938991499096, "grad_norm": 0.0008653284166939557, "learning_rate": 0.00014656532638959035, "loss": 46.0, "step": 1044 }, { "epoch": 0.14270595063330033, "grad_norm": 0.0004421341873239726, "learning_rate": 0.00014647231720437686, "loss": 46.0, "step": 1045 }, { "epoch": 0.14284251135160972, "grad_norm": 0.0008486118167638779, "learning_rate": 0.0001463792567151126, "loss": 46.0, "step": 1046 }, { "epoch": 0.14297907206991908, "grad_norm": 0.000525649928022176, "learning_rate": 0.0001462861450245336, "loss": 46.0, "step": 1047 }, { "epoch": 0.14311563278822848, "grad_norm": 0.0017683632904663682, "learning_rate": 0.00014619298223543235, "loss": 46.0, "step": 1048 }, { "epoch": 0.14325219350653784, "grad_norm": 0.0012217290932312608, "learning_rate": 0.00014609976845065783, "loss": 46.0, "step": 1049 }, { "epoch": 0.14338875422484723, "grad_norm": 0.0011137262918055058, "learning_rate": 0.00014600650377311522, "loss": 46.0, "step": 1050 }, { "epoch": 0.1435253149431566, "grad_norm": 0.0003707687428686768, "learning_rate": 0.00014591318830576598, "loss": 46.0, "step": 1051 }, { "epoch": 0.143661875661466, "grad_norm": 0.000414914742577821, "learning_rate": 0.0001458198221516276, "loss": 46.0, "step": 1052 }, { "epoch": 0.14379843637977535, "grad_norm": 0.0008973225485533476, "learning_rate": 0.0001457264054137735, "loss": 46.0, "step": 1053 }, { "epoch": 0.14393499709808474, "grad_norm": 0.00040008637006394565, "learning_rate": 0.000145632938195333, "loss": 46.0, "step": 1054 }, { "epoch": 0.1440715578163941, "grad_norm": 0.0005400644731707871, "learning_rate": 0.0001455394205994911, "loss": 46.0, "step": 1055 }, { "epoch": 0.1442081185347035, "grad_norm": 0.00028061779448762536, "learning_rate": 0.00014544585272948843, "loss": 46.0, "step": 1056 }, { "epoch": 0.14434467925301286, "grad_norm": 0.0007635858491994441, "learning_rate": 0.00014535223468862114, "loss": 46.0, "step": 1057 }, { "epoch": 0.14448123997132226, "grad_norm": 0.0004812279948964715, "learning_rate": 0.00014525856658024076, "loss": 46.0, "step": 1058 }, { "epoch": 0.14461780068963162, "grad_norm": 0.0004690811620093882, "learning_rate": 0.00014516484850775406, "loss": 46.0, "step": 1059 }, { "epoch": 0.144754361407941, "grad_norm": 0.0008514091605320573, "learning_rate": 0.00014507108057462296, "loss": 46.0, "step": 1060 }, { "epoch": 0.14489092212625038, "grad_norm": 0.00031304339063353837, "learning_rate": 0.00014497726288436458, "loss": 46.0, "step": 1061 }, { "epoch": 0.14502748284455977, "grad_norm": 0.00015942241589073092, "learning_rate": 0.00014488339554055073, "loss": 46.0, "step": 1062 }, { "epoch": 0.14516404356286913, "grad_norm": 0.0012250308645889163, "learning_rate": 0.0001447894786468082, "loss": 46.0, "step": 1063 }, { "epoch": 0.14530060428117852, "grad_norm": 0.00036729895509779453, "learning_rate": 0.00014469551230681844, "loss": 46.0, "step": 1064 }, { "epoch": 0.1454371649994879, "grad_norm": 0.000257661915384233, "learning_rate": 0.00014460149662431747, "loss": 46.0, "step": 1065 }, { "epoch": 0.14557372571779728, "grad_norm": 0.000458430964499712, "learning_rate": 0.00014450743170309584, "loss": 46.0, "step": 1066 }, { "epoch": 0.14571028643610665, "grad_norm": 0.0004429294203873724, "learning_rate": 0.00014441331764699836, "loss": 46.0, "step": 1067 }, { "epoch": 0.14584684715441604, "grad_norm": 0.0005923425196669996, "learning_rate": 0.00014431915455992414, "loss": 46.0, "step": 1068 }, { "epoch": 0.1459834078727254, "grad_norm": 0.0003316183283459395, "learning_rate": 0.00014422494254582647, "loss": 46.0, "step": 1069 }, { "epoch": 0.1461199685910348, "grad_norm": 0.0004024969239253551, "learning_rate": 0.0001441306817087125, "loss": 46.0, "step": 1070 }, { "epoch": 0.14625652930934416, "grad_norm": 0.00029238680144771934, "learning_rate": 0.00014403637215264353, "loss": 46.0, "step": 1071 }, { "epoch": 0.14639309002765355, "grad_norm": 0.00044409476686269045, "learning_rate": 0.00014394201398173437, "loss": 46.0, "step": 1072 }, { "epoch": 0.1465296507459629, "grad_norm": 0.000767083081882447, "learning_rate": 0.00014384760730015364, "loss": 46.0, "step": 1073 }, { "epoch": 0.1466662114642723, "grad_norm": 0.00029706236091442406, "learning_rate": 0.00014375315221212357, "loss": 46.0, "step": 1074 }, { "epoch": 0.14680277218258167, "grad_norm": 0.0004188843595329672, "learning_rate": 0.00014365864882191968, "loss": 46.0, "step": 1075 }, { "epoch": 0.14693933290089106, "grad_norm": 0.0005889105377718806, "learning_rate": 0.0001435640972338709, "loss": 46.0, "step": 1076 }, { "epoch": 0.14707589361920043, "grad_norm": 0.00072791165439412, "learning_rate": 0.00014346949755235944, "loss": 46.0, "step": 1077 }, { "epoch": 0.14721245433750982, "grad_norm": 0.0006283425609581172, "learning_rate": 0.00014337484988182042, "loss": 46.0, "step": 1078 }, { "epoch": 0.14734901505581918, "grad_norm": 0.000486049015307799, "learning_rate": 0.00014328015432674214, "loss": 46.0, "step": 1079 }, { "epoch": 0.14748557577412857, "grad_norm": 0.0007576828938908875, "learning_rate": 0.00014318541099166555, "loss": 46.0, "step": 1080 }, { "epoch": 0.14762213649243794, "grad_norm": 0.0010862492490559816, "learning_rate": 0.00014309061998118454, "loss": 46.0, "step": 1081 }, { "epoch": 0.14775869721074733, "grad_norm": 0.0005167250637896359, "learning_rate": 0.00014299578139994557, "loss": 46.0, "step": 1082 }, { "epoch": 0.1478952579290567, "grad_norm": 0.002369736786931753, "learning_rate": 0.00014290089535264755, "loss": 46.0, "step": 1083 }, { "epoch": 0.1480318186473661, "grad_norm": 0.0003688117431011051, "learning_rate": 0.0001428059619440419, "loss": 46.0, "step": 1084 }, { "epoch": 0.14816837936567545, "grad_norm": 0.0003458712890278548, "learning_rate": 0.00014271098127893218, "loss": 46.0, "step": 1085 }, { "epoch": 0.14830494008398484, "grad_norm": 0.0013860361650586128, "learning_rate": 0.0001426159534621743, "loss": 46.0, "step": 1086 }, { "epoch": 0.1484415008022942, "grad_norm": 0.001035936875268817, "learning_rate": 0.00014252087859867608, "loss": 46.0, "step": 1087 }, { "epoch": 0.1485780615206036, "grad_norm": 0.00632870476692915, "learning_rate": 0.00014242575679339738, "loss": 46.0, "step": 1088 }, { "epoch": 0.14871462223891296, "grad_norm": 0.0006592991994693875, "learning_rate": 0.00014233058815134978, "loss": 46.0, "step": 1089 }, { "epoch": 0.14885118295722236, "grad_norm": 0.0028478745371103287, "learning_rate": 0.00014223537277759666, "loss": 46.0, "step": 1090 }, { "epoch": 0.14898774367553172, "grad_norm": 0.0005855032941326499, "learning_rate": 0.00014214011077725292, "loss": 46.0, "step": 1091 }, { "epoch": 0.1491243043938411, "grad_norm": 0.0023942850530147552, "learning_rate": 0.00014204480225548494, "loss": 46.0, "step": 1092 }, { "epoch": 0.1492608651121505, "grad_norm": 0.0013539772480726242, "learning_rate": 0.00014194944731751058, "loss": 46.0, "step": 1093 }, { "epoch": 0.14939742583045987, "grad_norm": 0.0009589577093720436, "learning_rate": 0.00014185404606859877, "loss": 46.0, "step": 1094 }, { "epoch": 0.14953398654876926, "grad_norm": 0.0003180347557645291, "learning_rate": 0.00014175859861406966, "loss": 46.0, "step": 1095 }, { "epoch": 0.14967054726707862, "grad_norm": 0.0014342650538310409, "learning_rate": 0.00014166310505929434, "loss": 46.0, "step": 1096 }, { "epoch": 0.14980710798538802, "grad_norm": 0.0006643772940151393, "learning_rate": 0.00014156756550969492, "loss": 46.0, "step": 1097 }, { "epoch": 0.14994366870369738, "grad_norm": 0.001409722724929452, "learning_rate": 0.00014147198007074415, "loss": 46.0, "step": 1098 }, { "epoch": 0.15008022942200677, "grad_norm": 0.0008714981959201396, "learning_rate": 0.00014137634884796557, "loss": 46.0, "step": 1099 }, { "epoch": 0.15021679014031614, "grad_norm": 0.00160513399168849, "learning_rate": 0.00014128067194693316, "loss": 46.0, "step": 1100 }, { "epoch": 0.15035335085862553, "grad_norm": 0.0004049557028338313, "learning_rate": 0.0001411849494732713, "loss": 46.0, "step": 1101 }, { "epoch": 0.1504899115769349, "grad_norm": 0.0005186764756217599, "learning_rate": 0.00014108918153265485, "loss": 46.0, "step": 1102 }, { "epoch": 0.15062647229524428, "grad_norm": 0.002497048582881689, "learning_rate": 0.00014099336823080865, "loss": 46.0, "step": 1103 }, { "epoch": 0.15076303301355365, "grad_norm": 0.00020665116608142853, "learning_rate": 0.00014089750967350781, "loss": 46.0, "step": 1104 }, { "epoch": 0.15089959373186304, "grad_norm": 0.0005294004804454744, "learning_rate": 0.0001408016059665773, "loss": 46.0, "step": 1105 }, { "epoch": 0.1510361544501724, "grad_norm": 0.00033093662932515144, "learning_rate": 0.00014070565721589195, "loss": 46.0, "step": 1106 }, { "epoch": 0.1511727151684818, "grad_norm": 0.0003122551424894482, "learning_rate": 0.00014060966352737628, "loss": 46.0, "step": 1107 }, { "epoch": 0.15130927588679116, "grad_norm": 0.0006798842805437744, "learning_rate": 0.00014051362500700447, "loss": 46.0, "step": 1108 }, { "epoch": 0.15144583660510055, "grad_norm": 0.0005911933840252459, "learning_rate": 0.00014041754176080017, "loss": 46.0, "step": 1109 }, { "epoch": 0.15158239732340992, "grad_norm": 0.0010168857406824827, "learning_rate": 0.00014032141389483648, "loss": 46.0, "step": 1110 }, { "epoch": 0.1517189580417193, "grad_norm": 0.00033409081515856087, "learning_rate": 0.00014022524151523563, "loss": 46.0, "step": 1111 }, { "epoch": 0.15185551876002867, "grad_norm": 0.0002926045854110271, "learning_rate": 0.00014012902472816907, "loss": 46.0, "step": 1112 }, { "epoch": 0.15199207947833807, "grad_norm": 0.0013335029361769557, "learning_rate": 0.00014003276363985727, "loss": 46.0, "step": 1113 }, { "epoch": 0.15212864019664743, "grad_norm": 0.000854438403621316, "learning_rate": 0.00013993645835656953, "loss": 46.0, "step": 1114 }, { "epoch": 0.15226520091495682, "grad_norm": 0.0009656418114900589, "learning_rate": 0.00013984010898462416, "loss": 46.0, "step": 1115 }, { "epoch": 0.1524017616332662, "grad_norm": 0.00042812732863239944, "learning_rate": 0.00013974371563038785, "loss": 46.0, "step": 1116 }, { "epoch": 0.15253832235157558, "grad_norm": 0.0006366227171383798, "learning_rate": 0.00013964727840027604, "loss": 46.0, "step": 1117 }, { "epoch": 0.15267488306988494, "grad_norm": 0.0011450116289779544, "learning_rate": 0.00013955079740075256, "loss": 46.0, "step": 1118 }, { "epoch": 0.15281144378819433, "grad_norm": 0.00023903950932435691, "learning_rate": 0.00013945427273832954, "loss": 46.0, "step": 1119 }, { "epoch": 0.1529480045065037, "grad_norm": 0.0011047361185774207, "learning_rate": 0.0001393577045195673, "loss": 46.0, "step": 1120 }, { "epoch": 0.1530845652248131, "grad_norm": 0.0004785344353877008, "learning_rate": 0.0001392610928510743, "loss": 46.0, "step": 1121 }, { "epoch": 0.15322112594312245, "grad_norm": 0.0008018110529519618, "learning_rate": 0.00013916443783950694, "loss": 46.0, "step": 1122 }, { "epoch": 0.15335768666143185, "grad_norm": 0.0012497154530137777, "learning_rate": 0.00013906773959156948, "loss": 46.0, "step": 1123 }, { "epoch": 0.1534942473797412, "grad_norm": 0.00042997964192181826, "learning_rate": 0.00013897099821401384, "loss": 46.0, "step": 1124 }, { "epoch": 0.1536308080980506, "grad_norm": 0.0003085716161876917, "learning_rate": 0.00013887421381363968, "loss": 46.0, "step": 1125 }, { "epoch": 0.15376736881635997, "grad_norm": 0.000396199116948992, "learning_rate": 0.00013877738649729405, "loss": 46.0, "step": 1126 }, { "epoch": 0.15390392953466936, "grad_norm": 0.0006908946088515222, "learning_rate": 0.00013868051637187144, "loss": 46.0, "step": 1127 }, { "epoch": 0.15404049025297872, "grad_norm": 0.0003736602666322142, "learning_rate": 0.00013858360354431355, "loss": 46.0, "step": 1128 }, { "epoch": 0.15417705097128812, "grad_norm": 0.0006938680890016258, "learning_rate": 0.00013848664812160925, "loss": 46.0, "step": 1129 }, { "epoch": 0.15431361168959748, "grad_norm": 0.0005900769028812647, "learning_rate": 0.00013838965021079446, "loss": 46.0, "step": 1130 }, { "epoch": 0.15445017240790687, "grad_norm": 0.002096734009683132, "learning_rate": 0.00013829260991895197, "loss": 46.0, "step": 1131 }, { "epoch": 0.15458673312621624, "grad_norm": 0.0011866495478898287, "learning_rate": 0.00013819552735321134, "loss": 46.0, "step": 1132 }, { "epoch": 0.15472329384452563, "grad_norm": 0.00037106405943632126, "learning_rate": 0.00013809840262074885, "loss": 46.0, "step": 1133 }, { "epoch": 0.154859854562835, "grad_norm": 0.0006705286214128137, "learning_rate": 0.0001380012358287873, "loss": 46.0, "step": 1134 }, { "epoch": 0.15499641528114438, "grad_norm": 0.00040015694685280323, "learning_rate": 0.0001379040270845959, "loss": 46.0, "step": 1135 }, { "epoch": 0.15513297599945375, "grad_norm": 0.0005712392157875001, "learning_rate": 0.00013780677649549025, "loss": 46.0, "step": 1136 }, { "epoch": 0.15526953671776314, "grad_norm": 0.00047067005652934313, "learning_rate": 0.00013770948416883205, "loss": 46.0, "step": 1137 }, { "epoch": 0.1554060974360725, "grad_norm": 0.0008378413622267544, "learning_rate": 0.00013761215021202916, "loss": 46.0, "step": 1138 }, { "epoch": 0.1555426581543819, "grad_norm": 0.0007883374928496778, "learning_rate": 0.00013751477473253533, "loss": 46.0, "step": 1139 }, { "epoch": 0.15567921887269126, "grad_norm": 0.0012233637971803546, "learning_rate": 0.0001374173578378502, "loss": 46.0, "step": 1140 }, { "epoch": 0.15581577959100065, "grad_norm": 0.004130385350435972, "learning_rate": 0.00013731989963551913, "loss": 46.0, "step": 1141 }, { "epoch": 0.15595234030931002, "grad_norm": 0.0029437027405947447, "learning_rate": 0.00013722240023313306, "loss": 46.0, "step": 1142 }, { "epoch": 0.1560889010276194, "grad_norm": 0.0003660391375888139, "learning_rate": 0.00013712485973832838, "loss": 46.0, "step": 1143 }, { "epoch": 0.15622546174592877, "grad_norm": 0.0020895125344395638, "learning_rate": 0.00013702727825878693, "loss": 46.0, "step": 1144 }, { "epoch": 0.15636202246423817, "grad_norm": 0.0016986053669825196, "learning_rate": 0.00013692965590223573, "loss": 46.0, "step": 1145 }, { "epoch": 0.15649858318254753, "grad_norm": 0.0005671007093042135, "learning_rate": 0.00013683199277644693, "loss": 46.0, "step": 1146 }, { "epoch": 0.15663514390085692, "grad_norm": 0.0009818869875743985, "learning_rate": 0.00013673428898923774, "loss": 46.0, "step": 1147 }, { "epoch": 0.15677170461916629, "grad_norm": 0.0004315339319873601, "learning_rate": 0.00013663654464847022, "loss": 46.0, "step": 1148 }, { "epoch": 0.15690826533747568, "grad_norm": 0.0006189457490108907, "learning_rate": 0.0001365387598620512, "loss": 46.0, "step": 1149 }, { "epoch": 0.15704482605578504, "grad_norm": 0.0007538103964179754, "learning_rate": 0.00013644093473793215, "loss": 46.0, "step": 1150 }, { "epoch": 0.15718138677409443, "grad_norm": 0.0005718530155718327, "learning_rate": 0.00013634306938410911, "loss": 46.0, "step": 1151 }, { "epoch": 0.1573179474924038, "grad_norm": 0.0006374249351210892, "learning_rate": 0.00013624516390862244, "loss": 46.0, "step": 1152 }, { "epoch": 0.1574545082107132, "grad_norm": 0.0007302735466510057, "learning_rate": 0.00013614721841955692, "loss": 46.0, "step": 1153 }, { "epoch": 0.15759106892902255, "grad_norm": 0.0005098398542031646, "learning_rate": 0.00013604923302504147, "loss": 46.0, "step": 1154 }, { "epoch": 0.15772762964733195, "grad_norm": 0.0005700102774426341, "learning_rate": 0.00013595120783324902, "loss": 46.0, "step": 1155 }, { "epoch": 0.1578641903656413, "grad_norm": 0.00040146647370420396, "learning_rate": 0.00013585314295239644, "loss": 46.0, "step": 1156 }, { "epoch": 0.1580007510839507, "grad_norm": 0.0008773392182774842, "learning_rate": 0.00013575503849074444, "loss": 46.0, "step": 1157 }, { "epoch": 0.15813731180226007, "grad_norm": 0.0003678193024825305, "learning_rate": 0.0001356568945565974, "loss": 46.0, "step": 1158 }, { "epoch": 0.15827387252056946, "grad_norm": 0.0013553998433053493, "learning_rate": 0.0001355587112583033, "loss": 46.0, "step": 1159 }, { "epoch": 0.15841043323887882, "grad_norm": 0.0009887435007840395, "learning_rate": 0.00013546048870425356, "loss": 46.0, "step": 1160 }, { "epoch": 0.15854699395718821, "grad_norm": 0.0019033915596082807, "learning_rate": 0.00013536222700288303, "loss": 46.0, "step": 1161 }, { "epoch": 0.1586835546754976, "grad_norm": 0.0006128349923528731, "learning_rate": 0.00013526392626266956, "loss": 46.0, "step": 1162 }, { "epoch": 0.15882011539380697, "grad_norm": 0.0005847832653671503, "learning_rate": 0.00013516558659213432, "loss": 46.0, "step": 1163 }, { "epoch": 0.15895667611211636, "grad_norm": 0.0005258521996438503, "learning_rate": 0.00013506720809984137, "loss": 46.0, "step": 1164 }, { "epoch": 0.15909323683042573, "grad_norm": 0.00040099999750964344, "learning_rate": 0.0001349687908943976, "loss": 46.0, "step": 1165 }, { "epoch": 0.15922979754873512, "grad_norm": 0.0007036002352833748, "learning_rate": 0.0001348703350844527, "loss": 46.0, "step": 1166 }, { "epoch": 0.15936635826704448, "grad_norm": 0.00037679230445064604, "learning_rate": 0.00013477184077869892, "loss": 46.0, "step": 1167 }, { "epoch": 0.15950291898535388, "grad_norm": 0.0005223838961683214, "learning_rate": 0.000134673308085871, "loss": 46.0, "step": 1168 }, { "epoch": 0.15963947970366324, "grad_norm": 0.0008007617434486747, "learning_rate": 0.0001345747371147461, "loss": 46.0, "step": 1169 }, { "epoch": 0.15977604042197263, "grad_norm": 0.000517090258654207, "learning_rate": 0.0001344761279741437, "loss": 46.0, "step": 1170 }, { "epoch": 0.159912601140282, "grad_norm": 0.0008526128367520869, "learning_rate": 0.0001343774807729253, "loss": 46.0, "step": 1171 }, { "epoch": 0.1600491618585914, "grad_norm": 0.0011307375971227884, "learning_rate": 0.0001342787956199945, "loss": 46.0, "step": 1172 }, { "epoch": 0.16018572257690075, "grad_norm": 0.00033380292006768286, "learning_rate": 0.00013418007262429668, "loss": 46.0, "step": 1173 }, { "epoch": 0.16032228329521014, "grad_norm": 0.000278162129689008, "learning_rate": 0.00013408131189481911, "loss": 46.0, "step": 1174 }, { "epoch": 0.1604588440135195, "grad_norm": 0.00258398219011724, "learning_rate": 0.00013398251354059077, "loss": 46.0, "step": 1175 }, { "epoch": 0.1605954047318289, "grad_norm": 0.0006168753025121987, "learning_rate": 0.000133883677670682, "loss": 46.0, "step": 1176 }, { "epoch": 0.16073196545013826, "grad_norm": 0.00029901755624450743, "learning_rate": 0.0001337848043942047, "loss": 46.0, "step": 1177 }, { "epoch": 0.16086852616844766, "grad_norm": 0.000997790601104498, "learning_rate": 0.00013368589382031196, "loss": 46.0, "step": 1178 }, { "epoch": 0.16100508688675702, "grad_norm": 0.0006558905588462949, "learning_rate": 0.00013358694605819814, "loss": 46.0, "step": 1179 }, { "epoch": 0.1611416476050664, "grad_norm": 0.0006217307527549565, "learning_rate": 0.00013348796121709862, "loss": 46.0, "step": 1180 }, { "epoch": 0.16127820832337578, "grad_norm": 0.0004898210754618049, "learning_rate": 0.00013338893940628973, "loss": 46.0, "step": 1181 }, { "epoch": 0.16141476904168517, "grad_norm": 0.0009382545249536633, "learning_rate": 0.00013328988073508852, "loss": 46.0, "step": 1182 }, { "epoch": 0.16155132975999453, "grad_norm": 0.0006358098471537232, "learning_rate": 0.00013319078531285285, "loss": 46.0, "step": 1183 }, { "epoch": 0.16168789047830393, "grad_norm": 0.0012355463113635778, "learning_rate": 0.00013309165324898112, "loss": 46.0, "step": 1184 }, { "epoch": 0.1618244511966133, "grad_norm": 0.0005907994927838445, "learning_rate": 0.00013299248465291214, "loss": 46.0, "step": 1185 }, { "epoch": 0.16196101191492268, "grad_norm": 0.002715210895985365, "learning_rate": 0.00013289327963412513, "loss": 46.0, "step": 1186 }, { "epoch": 0.16209757263323205, "grad_norm": 0.001038241432979703, "learning_rate": 0.00013279403830213942, "loss": 46.0, "step": 1187 }, { "epoch": 0.16223413335154144, "grad_norm": 0.0015468295896425843, "learning_rate": 0.00013269476076651447, "loss": 46.0, "step": 1188 }, { "epoch": 0.1623706940698508, "grad_norm": 0.0017287740483880043, "learning_rate": 0.00013259544713684974, "loss": 46.0, "step": 1189 }, { "epoch": 0.1625072547881602, "grad_norm": 0.0018615883309394121, "learning_rate": 0.00013249609752278454, "loss": 46.0, "step": 1190 }, { "epoch": 0.16264381550646956, "grad_norm": 0.00047380104660987854, "learning_rate": 0.0001323967120339978, "loss": 46.0, "step": 1191 }, { "epoch": 0.16278037622477895, "grad_norm": 0.001483297673985362, "learning_rate": 0.00013229729078020823, "loss": 46.0, "step": 1192 }, { "epoch": 0.16291693694308831, "grad_norm": 0.0004559273656923324, "learning_rate": 0.00013219783387117385, "loss": 46.0, "step": 1193 }, { "epoch": 0.1630534976613977, "grad_norm": 0.0008615512633696198, "learning_rate": 0.00013209834141669213, "loss": 46.0, "step": 1194 }, { "epoch": 0.16319005837970707, "grad_norm": 0.0005908702732995152, "learning_rate": 0.0001319988135265998, "loss": 46.0, "step": 1195 }, { "epoch": 0.16332661909801646, "grad_norm": 0.0008730573463253677, "learning_rate": 0.00013189925031077267, "loss": 46.0, "step": 1196 }, { "epoch": 0.16346317981632583, "grad_norm": 0.0012465447653084993, "learning_rate": 0.00013179965187912554, "loss": 46.0, "step": 1197 }, { "epoch": 0.16359974053463522, "grad_norm": 0.0013986461563035846, "learning_rate": 0.00013170001834161209, "loss": 46.0, "step": 1198 }, { "epoch": 0.16373630125294458, "grad_norm": 0.0011632711393758655, "learning_rate": 0.0001316003498082248, "loss": 46.0, "step": 1199 }, { "epoch": 0.16387286197125397, "grad_norm": 0.001200903090648353, "learning_rate": 0.0001315006463889948, "loss": 46.0, "step": 1200 }, { "epoch": 0.16387286197125397, "eval_loss": 11.5, "eval_runtime": 20.5706, "eval_samples_per_second": 149.923, "eval_steps_per_second": 74.961, "step": 1200 } ], "logging_steps": 1, "max_steps": 3000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 300, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 42923841650688.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }